aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--SNPParser.sc200
1 files changed, 100 insertions, 100 deletions
diff --git a/SNPParser.sc b/SNPParser.sc
index d3e89e9..6f33c63 100644
--- a/SNPParser.sc
+++ b/SNPParser.sc
@@ -1,107 +1,107 @@
SNPParser{
- var <placement;
- var <comboDict;
- var <file;
- var <fileLength;
- var <userID;
- var <testSet;
+ var <placement;
+ var <comboDict;
+ var <file;
+ var <fileLength;
+ var <userID;
+ var <testSet;
- *new{
- arg aFile, aTestSet = inf;
- ^super.new.init(aFile, aTestSet);
- }
+ *new{
+ arg aFile, aTestSet = inf;
+ ^super.new.init(aFile, aTestSet);
+ }
- init{
- arg aFile, aTestSet;
- this.setFileAndUser(aFile);
- testSet = aTestSet;
- }
+ init{
+ arg aFile, aTestSet;
+ this.setFileAndUser(aFile);
+ testSet = aTestSet;
+ }
- setFileAndUser{//set the file (again) to be parsed
- arg aFile;
- var allSlash, allDot;
- file = aFile;
- allSlash = file.findAll("/");
- allDot = file.findAll(".");
- if(allSlash.notNil,{//if there is no slash in the filename
- userID = file[allSlash[allSlash.size-1]+1..allDot[allDot.size-2]-1].asInt;
- },{
- userID = file[0..allDot[allDot.size-2]-1].asInt;
- });
- userID.postln;
- }
+ setFileAndUser{//set the file (again) to be parsed
+ arg aFile;
+ var allSlash, allDot;
+ file = aFile;
+ allSlash = file.findAll("/");
+ allDot = file.findAll(".");
+ if(allSlash.notNil,{//if there is no slash in the filename
+ userID = file[allSlash[allSlash.size-1]+1..allDot[allDot.size-2]-1].asInt;
+ },{
+ userID = file[0..allDot[allDot.size-2]-1].asInt;
+ });
+ userID.postln;
+ }
- readFile{//parse the file this parser currently is setup with
- var found = false, company = "";
- "Guessing parser.".postln;
- SNPInfo.sCompany.do({
- arg cCompany, i;
- if(file.contains(cCompany.asString),{
- if(SNPInfo.workingSCompany.includes(i),{
- ("Attempting to parse a "++cCompany++" file now.").postln;
- company = cCompany;
- found = true;
- },{
- ("Parsing files from "++cCompany++" is not supported (yet). Get me a coffee to make it work!").postln;
- });
- });
- });
- if(found.not,{
- ("This file might not be supported at all: "++file).postln;
- },{
- switch(company,
- "23andme",{^this.parse23andme},
- {"No no no no no!".postln}
- );
- });
- }
+ readFile{//parse the file this parser currently is setup with
+ var found = false, company = "";
+ "Guessing parser.".postln;
+ SNPInfo.sCompany.do({
+ arg cCompany, i;
+ if(file.contains(cCompany.asString),{
+ if(SNPInfo.workingSCompany.includes(i),{
+ ("Attempting to parse a "++cCompany++" file now.").postln;
+ company = cCompany;
+ found = true;
+ },{
+ ("Parsing files from "++cCompany++" is not supported (yet). Get me a coffee to make it work!").postln;
+ });
+ });
+ });
+ if(found.not,{
+ ("This file might not be supported at all: "++file).postln;
+ },{
+ switch(company,
+ "23andme",{^this.parse23andme},
+ {"No no no no no!".postln}
+ );
+ });
+ }
- parse23andme{//parsing a 23andme file: id, chromosome, position, base
- var snpFile = File(file, "r"), line = "", counter = 0.0, tmp, snp, newSameCounter = 0.0;
- fileLength = (("wc -l "++file.shellQuote).unixCmdGetStdOut).delimit({|ch| ch.isSpace});
- ("File "++file++" is "++fileLength[0]++" lines long.").postln;
- "Parsing might require some minutes! So kick back an get a coffee, while it lasts.".postln;
- comboDict = SNPDict.new(fileLength[0], userID);
- if(snpFile.isOpen,{
- protect{
- while{(line = snpFile.getLine).notNil}{//FIXME: Reinsert testset
- if(line[0].asString!="#",{//skip commented lines
- tmp = line.delimit({|ch| ch.isSpace});//delimit the line by space and/or tab
- if(tmp[3].asString!="--" && (SNPInfo.isBasePair(tmp[3]) || SNPInfo.isBase(tmp[3]) && (SNPInfo.chromosomesLength[SNPInfo.convertChromosome(tmp[1])-1]>=tmp[2].asFloat)),{//skip empty SNPs and make sure it's either a single base or a base pair and ignore out-of-range SNPs (yes, science is unclear!)
- if(SNPInfo.isBasePair(tmp[3]),{//if it's a base pair, set it up
- snp = SNP.new(tmp[1], tmp[2], tmp[0], tmp[3], SNPInfo.createResolverForPair(tmp[3]));
- },{
- if(SNPInfo.isBase(tmp[3]), {//if it's a single base, set it up
- snp = SNP.new(tmp[1], tmp[2], tmp[0], tmp[3], \none);
- });
- });
- newSameCounter = newSameCounter + comboDict.storeSNP(snp, SNPInfo.calcPosition(snp.chromosome, snp.position));
- switch(newSameCounter,
- 1.0,{"Storing SNPs now: \n==========".postln;},
- 100000.0,{"=".post;},
- 200000.0,{"=".post;},
- 300000.0,{"=".post;},
- 400000.0,{"=".post;},
- 500000.0,{"=".post;},
- 600000.0,{"=".post;},
- 700000.0,{"=".post;},
- 800000.0,{"=".post;},
- 900000.0,{"=".post;},
- );
- });
- });
- counter = counter + 1;
- };
- }{
- snpFile.close;
- };
- },{
- ("Couldn't open file for reading: "++file).warn;
- });
- "=".postln;
- "Sorting lookup tables. This will also take some time!".postln;
- comboDict.orderLookup(2);
- "Done sorting lookup tables.".postln;
- ^comboDict;
- }
+ parse23andme{//parsing a 23andme file: id, chromosome, position, base
+ var snpFile = File(file, "r"), line = "", counter = 0.0, tmp, snp, newSameCounter = 0.0;
+ fileLength = (("wc -l "++file.shellQuote).unixCmdGetStdOut).delimit({|ch| ch.isSpace});
+ ("File "++file++" is "++fileLength[0]++" lines long.").postln;
+ "Parsing might require some minutes! So kick back an get a coffee, while it lasts.".postln;
+ comboDict = SNPDict.new(fileLength[0], userID);
+ if(snpFile.isOpen,{
+ protect{
+ while{(line = snpFile.getLine).notNil}{//FIXME: Reinsert testset
+ if(line[0].asString!="#",{//skip commented lines
+ tmp = line.delimit({|ch| ch.isSpace});//delimit the line by space and/or tab
+ if(tmp[3].asString!="--" && (SNPInfo.isBasePair(tmp[3]) || SNPInfo.isBase(tmp[3]) && (SNPInfo.chromosomesLength[SNPInfo.convertChromosome(tmp[1])-1]>=tmp[2].asFloat)),{//skip empty SNPs and make sure it's either a single base or a base pair and ignore out-of-range SNPs (yes, science is unclear!)
+ if(SNPInfo.isBasePair(tmp[3]),{//if it's a base pair, set it up
+ snp = SNP.new(tmp[1], tmp[2], tmp[0], tmp[3], SNPInfo.createResolverForPair(tmp[3]));
+ },{
+ if(SNPInfo.isBase(tmp[3]), {//if it's a single base, set it up
+ snp = SNP.new(tmp[1], tmp[2], tmp[0], tmp[3], \none);
+ });
+ });
+ newSameCounter = newSameCounter + comboDict.storeSNP(snp, SNPInfo.calcPosition(snp.chromosome, snp.position));
+ switch(newSameCounter,
+ 1.0,{"Storing SNPs now: \n==========".postln;},
+ 100000.0,{"=".post;},
+ 200000.0,{"=".post;},
+ 300000.0,{"=".post;},
+ 400000.0,{"=".post;},
+ 500000.0,{"=".post;},
+ 600000.0,{"=".post;},
+ 700000.0,{"=".post;},
+ 800000.0,{"=".post;},
+ 900000.0,{"=".post;},
+ );
+ });
+ });
+ counter = counter + 1;
+ };
+ }{
+ snpFile.close;
+ };
+ },{
+ ("Couldn't open file for reading: "++file).warn;
+ });
+ "=".postln;
+ "Sorting lookup tables. This will also take some time!".postln;
+ comboDict.orderLookup(2);
+ "Done sorting lookup tables.".postln;
+ ^comboDict;
+ }
}