-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathintensity.py
291 lines (235 loc) · 11.1 KB
/
intensity.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
import numpy as np
from PIL import Image
from argparse import ArgumentParser
import pandas as pd
import os
def collect_args():
"""
Sets up and collects necessary arguments using ArgumentParser.
Returns
-------
args : argparse.Namespace
Namespace containing the arguments and their values specified by the user.
"""
parser = ArgumentParser('python intensity.py')
parser.add_argument(
'dir', help='path to the directory where files matching root will be passed into program')
parser.add_argument(
'root', help='root name of files whose sums will be averaged over (case-sensitive)')
parser.add_argument('row_start', type=int,
help='starting row, must be an integer in [1,row_end]')
parser.add_argument(
'row_end', type=int, help='ending row, must be an integer in [row_start,number of rows in image]')
parser.add_argument('col_start', type=int,
help='starting column, must be an integer in [1,col_end]')
parser.add_argument(
'col_end', type=int, help='ending column, must be an integer in [row_start,number of columns in image]')
parser.add_argument(
'sum', help='whether you are summing over rows \"r\" or columns \"c\"')
parser.add_argument(
'out', help='path for the output file. for outputfile format, see docs for save_avgs()')
args = parser.parse_args()
return args
def prelim_check_args(args):
"""
Preliminary argument check. This checks if:
1. `args.row_start` is in `[1, row_end]`
2. `args.col_start` is in `[1, col_end]`
3. `args.sum` is in `{'r', 'c'}`
Parameters
----------
args : argparse.Namespace
Namespace of arguments returned by `collect_args()`
Raises
------
ValueError
If any of conditions 1-3 are not met. An appropriate error message is included.
"""
if args.row_start < 1 or args.row_start > args.row_end:
raise ValueError('row_start=%d is not in [1, row_end=%d]' % (
args.row_start, args.row_end))
if args.col_start < 1 or args.col_start > args.col_end:
raise ValueError('col_start=%d is not in [1, col_end=%d]' % (
args.col_start, args.col_end))
if args.sum not in {'r', 'c'}:
raise ValueError(
'\"%s\" is not a valid input, must be \"r\" for rows or \"c\" for columns' % (args.sum))
def second_check_args(fname, args):
"""
Secondary check of arguments. Opens a particular image file and checks if:
1. `args.row_end` <= number of rows in the image
2. `args.col_end` <= number of columns in the image
Parameters
----------
fname : str
Name of the image file used to check conditions 1-2. It should be contained in `args.dir`.
args : argparse.Namespace
Namespace of arguments returned by `collect_args()`
Raises
------
ValueError
If any of conditions 1-2 are not met. An appropriate error message is included.
FileNotFoundError
If a path to an image does not exist
"""
# get full path to image, load it into Numpy array - will just be comparing its shape ROI ends
fp = os.path.join(args.dir, fname)
img = np.asarray(Image.open(fp))
if args.row_end > img.shape[0]:
raise ValueError('row_end=%d is not in [row_start,number of rows in image=%d]' % (
args.row_end, img.shape[0]))
if args.col_end > img.shape[1]:
raise ValueError('col_end=%d is not in [col_start,number of columns in image=%d]' % (
args.col_end, img.shape[1]))
def rmv_extra_channels(img):
"""
If a greyscale image is stored as some color image, it will have 3 (RGB) color channels. Since we assume it is really greyscale, all the channels will have equal value so we create a new image of same width and height, but with just 1 color channel.
Parameters
----------
img : np.ndarray
A numpy array that represents a colored image of dimensions (nr,nc,3)
Returns
-------
gs_img : np.ndarray
An array of dimensions (nr,nc) where `gs_img[i,j]` is `img[i,j,0]`
"""
gs_img = np.zeros(img.shape[:2])
for i in range(gs_img.shape[0]):
for j in range(gs_img.shape[1]):
gs_img[i, j] = img[i, j, 0]
return gs_img
def compute_img_intensities(fname, args, shape):
"""
Computes the intensities for a particular greyscale image. If it is stored as a color image, then extra channels are removed using `rmv_extra_channels()`.
Parameters
----------
fname : str
Name of the image file
args : argparse.Namespace
Namespace of arguments returned by `collect_args()`
shape : tuple
Shape that the image should have. Ignored if `None`.
Returns
-------
intensities : np.ndarray
Numpy array with intensities summed over rows or columns based on user argument. It will be sized appropriately according to value of `args.sum`.
shape : tuple
Shape of image read in from `fname` in `args.dir`.
Raises
------
AssertionError
If the image stored at `fname` in `args.dir` does not have shape `shape` (for non `None` `shape`).
FileNotFoundError
If a path to an image does not exist
"""
# get full path to image, load it into Numpy array
fp = os.path.join(args.dir, fname)
img = np.asarray(Image.open(fp))
# save its shape for return, but first check if its num rows, num cols match required
curr_shape = img.shape
assert shape is None or curr_shape[:2] == shape[:2], 'Image %s (rows=%d, cols=%d) in %s does not match %d rows, %d columns from prev. files' % (
fname, curr_shape[0], curr_shape[1], args.dir, shape[0], shape[1])
# remove extra channels if theres a 3rd dimension in our image (that means its being stored as color image)
if len(img.shape) > 2:
print("%s has %d color channels, using only the first" %
(fname, img.shape[2]))
img = rmv_extra_channels(img)
# only get ROI, sum over appropriate axis
img = img[args.row_start-1:args.row_end, args.col_start-1:args.col_end]
intensities = np.sum(img, axis=0 if args.sum == 'r' else 1)
return intensities, curr_shape
def compute_avg_intensities(args):
"""
For a set of files in directory `args.dir` that start with root `args.root`, find the average intensities across the region of interest specified by `args.row_start`, `args.row_end`, `args.col_start`, and `args.col_end`.
Parameters
----------
args : argparse.Namespace
Namespace of arguments returned by `collect_args()`
Returns
-------
avgs : np.ndarray
Average itensities for each column if `args.sum == 'r'` or for each row if `args.sum == 'c'` in the ROIs for the file set described above. None if there are no files in `args.dir` that start with `args.root`.
Raises
------
AssertionError
See `compute_img_intensities()`
FileNotFoundError
See `compute_img_intensities()`
"""
# will collect intensities computed from each checked file in dir
dir_intensities = []
# will do second check using first matching file, then don't need to
done_check = False
# shape each image needs to be, starts as unknown - first set with first files shape
# then it is compared with each following file in compute_img_intensities
# it is then updated to the shape of each image (which if no error, will have the same 1st 2 dims for each one)
# allows for cleaner code to just do the update each time instead of splitting condition for 1st file
shape = None
for f in os.scandir(args.dir):
# skip those not matching root (case-sensitive check here)
if not f.name.startswith(args.root):
print("Skipping %s in %s" % (f.name, args.dir))
continue
# if we haven't done second check, do it then say we have
if not done_check:
second_check_args(f.name, args)
done_check = True
# compute intensities for match file, update the shape (in case of 1st match file), or just set again to itself (following), save them
print("Computing intensities for %s in %s" % (f.name, args.dir))
intensities, shape = compute_img_intensities(f.name, args, shape)
dir_intensities.append(intensities)
# stack saved intensities into matrix, then average them for each column in the matrix
# note, this is independent of with whether we are summing by rows/columns in ROI
print("\nComputing mean intensities for %d files" %
(len(dir_intensities)))
if len(dir_intensities) > 0:
avgs = np.mean(np.array(dir_intensities), axis=0)
return avgs
return None
def save_avgs(avgs, args):
"""
Saves the averages returned by `compute_avg_intensities()` to a CSV file with 2 columns.
* If `sum == "r"`, the 1st column (titled `column`) has the columns of the ROI and the 2nd column (titled `average_intensity`) has the average intensity for each column.
* If `sum == "c"`, the 1st column (titled `row`) has the rows of the ROI and the 2nd column (titled `average_intensity`) has the average intensity for each row.
Parameters
----------
avgs : np.ndarray
Averages returned by `compute_avg_intensities()`
args : argparse.Namespace
Namespace of arguments returned by `collect_args()`
"""
# going to build a DataFrame with 1 column of averages and an index for easy CSV writing
# step 1: make avgs into column vector
avgs = np.reshape(avgs, (avgs.shape[0], -1))
# step 2: index is columns if we summed over rows, and index is rows if we summed over columns
if args.sum == 'r':
idx = pd.Index(data=range(
args.col_start, args.col_end+1), name='column')
else:
idx = pd.Index(data=range(args.row_start, args.row_end+1), name='row')
# step 3: with column name, put together DF and write to CSV
df = pd.DataFrame(avgs, index=idx, columns=['average_intensity'])
df.to_csv(args.out)
def main():
"""
Main function: collects arguments, conducts preliminary check, computes average intensities and saves them.
Errors are printed to console as either Argument errors or
"""
print("-" * 100)
try:
args = collect_args()
print("Location Arguments:\ndir=%s\nroot=%s\nout=%s\n\nROI Arguments:\nrows=[%d,%d]\ncolumns=[%d,%d]\n\nComputation Arguments:\nsum type=%s\n" % (
args.dir, args.root, args.out, args.row_start, args.row_end, args.col_start, args.col_end, args.sum))
prelim_check_args(args)
avgs = compute_avg_intensities(args)
if avgs is not None:
save_avgs(avgs, args)
except ValueError as ve:
print("Argument error: %s\nFixes: check ROI, sum type arguments" % (str(ve)))
except AssertionError as ae:
print("Data error: %s\nFixes: make sure all of your files have the same number of rows, columns" % (str(ae)))
except FileNotFoundError as fe:
print("Path / Directory error:%s\nFixes: check dir, out paths (need directory of output file to exist)" % (str(fe)))
print("-" * 100)
if __name__ == '__main__':
main()