academic/pyCRAC/GTF2-scripts.patch


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79

diff -Naur sgrann-pycrac-8792459eeef7/pyCRAC/scripts/pyGTF2bedGraph.py sgrann-pycrac-8792459eeef7-slack/pyCRAC/scripts/pyGTF2bedGraph.py
--- sgrann-pycrac-8792459eeef7/pyCRAC/scripts/pyGTF2bedGraph.py	2017-09-11 15:11:12.000000000 +0100
+++ sgrann-pycrac-8792459eeef7-slack/pyCRAC/scripts/pyGTF2bedGraph.py	2017-09-10 00:47:13.014633000 +0100
@@ -2,7 +2,7 @@
 # not compatible with python 3
 __author__		= "Sander Granneman"
 __copyright__	= "Copyright 2017"
-__version__		= "0.0.7"
+__version__		= "0.0.6"
 __credits__		= ["Sander Granneman"]
 __maintainer__	= "Sander Granneman"
 __email__		= "sgrannem@staffmail.ed.ac.uk"
@@ -80,14 +80,12 @@
 	chromdata = processChromFile(chromlengthfile)
 	###
 	
-	normvalue = 1.0
-	if permillion and normvalue == 1.0:
-		assert data.mapped_reads, "No information on the number of mapped reads could be found in the gtf file. Cannot normalize the data"
-		normvalue = float(data.mapped_reads)/1000000.0
-			
 	while True:
 		lines = data.readLineByLine()
-							# to normalize the data to per million reads
+		normvalue = 1.0
+		if permillion and normvalue == 1.0:
+			assert data.mapped_reads, "No information on the number of mapped reads could be found in the gtf file. Cannot normalize the data"
+			normvalue = float(data.mapped_reads)/1000000.0								# to normalize the data to per million reads
 		if current_chromosome and data.chromosome != current_chromosome or not lines:
 			for strand in chromdict:
 				start = 0
diff -Naur sgrann-pycrac-8792459eeef7/pyCRAC/scripts/pyGTF2sgr.py sgrann-pycrac-8792459eeef7-slack/pyCRAC/scripts/pyGTF2sgr.py
--- sgrann-pycrac-8792459eeef7/pyCRAC/scripts/pyGTF2sgr.py	2017-09-11 15:11:12.000000000 +0100
+++ sgrann-pycrac-8792459eeef7-slack/pyCRAC/scripts/pyGTF2sgr.py	2017-09-10 00:47:13.014633000 +0100
@@ -1,8 +1,8 @@
 #!/usr/bin/python
 # not compatible with python 3
 __author__		= "Sander Granneman"
-__copyright__	= "Copyright 2017"
-__version__		= "0.0.6"
+__copyright__	= "Copyright 2015"
+__version__		= "0.0.5"
 __credits__		= ["Sander Granneman"]
 __maintainer__	= "Sander Granneman"
 __email__		= "sgrannem@staffmail.ed.ac.uk"
@@ -13,7 +13,7 @@
 #	pyGTF2sgr.py
 #
 #
-#	Copyright (c) Sander Granneman 2017
+#	Copyright (c) Sander Granneman 2015
 #	
 #	Permission is hereby granted, free of charge, to any person obtaining a copy
 #	of this software and associated documentation files (the "Software"), to deal
@@ -73,12 +73,11 @@
 	###
 	
 	normvalue = 1.0
-	if permillion:
-		assert data.mapped_reads, "No information on the number of mapped reads could be found in the gtf file. Cannot normalize the data"
-		normvalue = float(data.mapped_reads)/1000000.0
-
 	while True:
 		lines = data.readLineByLine()
+		if permillion and normvalue == 1.0:
+			assert data.mapped_reads, "No information on the number of mapped reads could be found in the gtf file. Cannot normalize the data"
+			normvalue = float(data.mapped_reads)/1000000.0
 		if current_chromosome and data.chromosome != current_chromosome or not lines:
 			for strand in strands:
 				if min_cov:
@@ -118,6 +117,8 @@
 			if data.substitutions: chromdict[data.strand][data.substitutions] += data.number_of_reads
 		elif type == "deletions":
 			if data.deletions: chromdict[data.strand][data.deletions] += data.number_of_reads
+		else:
+			break
 			
 def gtf2dropoffrates(gtffile,chromosomedata,out_files=[],score=False,log=sys.stdout,zeros=False,min_cov=0):
 	"""Produces an sgr output file for hits, substitutions and deletions. It requires, besides data a file containing