From 903d2ec99864616c288b99dfd1597665ceb5f542 Mon Sep 17 00:00:00 2001 From: Garrett Brown Date: Sat, 29 Apr 2017 16:33:04 -0500 Subject: [PATCH 01/22] Started the 3D generator model --- Models.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/Models.py b/Models.py index fd14f39..b3443b8 100644 --- a/Models.py +++ b/Models.py @@ -84,4 +84,15 @@ def Build(self): self.Model=Model(MInputs,modelT) - +class Model2DViewsTo3D(ModelWrapper): + def __init__(self, Name, View1, View2, Voxels, **kwargs): + super(MergerModel, self).__init__(Name,**kwargs) + self.View1 = View1 + self.View2 = View2 + self.N_Classes = Voxels[0]*Voxels[1]*Voxels[2] + self.MetaData.update({"N_classes": self.N_classes, + "init": self.init}) + + def Build(self): + modelT = concatenate([self.View1, self.View2]) + modelT = Dense(self.N_Classes, activation='softmax',kernel_initializer=self.init)(modelT) From 06cf1c38648b601241f4fcfe4fe7a3bb52afe246 Mon Sep 17 00:00:00 2001 From: Garrett Brown Date: Tue, 2 May 2017 01:34:00 -0500 Subject: [PATCH 02/22] More progress. Made a separate experiment file for 3D reconstruction. --- 3DReconstructionExperiment.py | 229 ++++++++++++++++++++++++++++++++++ ClassificationExperiment.py | 35 +----- ClassificationScanConfig.py | 33 +++++ Models.py | 55 ++++++-- 4 files changed, 310 insertions(+), 42 deletions(-) create mode 100644 3DReconstructionExperiment.py diff --git a/3DReconstructionExperiment.py b/3DReconstructionExperiment.py new file mode 100644 index 0000000..255ade5 --- /dev/null +++ b/3DReconstructionExperiment.py @@ -0,0 +1,229 @@ +import sys,os,argparse + +# Parse the Arguments +execfile("LArTPCDNN/ClassificationArguments.py") + +# Process the ConfigFile +execfile(ConfigFile) + +# Load the Data. TODO + +from LArTPCDNN.LoadData import * + +# TrainSampleList,TestSampleList=DivideFiles(FileSearch,[float(NSamples)/MaxEvents,float(NTestSamples)/MaxEvents], + # datasetnames=[u'features'], + # Particles=Particles) + +# # Figure out the output shape... This is not necessary. But the automatic mechanism is inefficient. +# if ScanWindowSize>0: +# # shapes=[(BatchSize*multiplier, 2, 240, ScanWindowSize), (BatchSize*multiplier, NClasses)] + # shapes=[(BatchSize*multiplier, 240, ScanWindowSize), + # (BatchSize*multiplier, 240, ScanWindowSize), + # (BatchSize*multiplier, NClasses)] + # viewshape=(None, 240, ScanWindowSize) +# else: + # shapes=[(BatchSize*multiplier, 240, 4096/DownSampleSize), + # (BatchSize*multiplier, 240, 4096/DownSampleSize), + # (BatchSize*multiplier, NClasses)] + + # viewshape=(None, 240, 4096/DownSampleSize) + +# def MakeGenerator(SampleList,NSamples, + # cachefile="LArIAT-LoadDataTest-Cache.h5",**kwargs): + + # return DLMultiClassFilterGenerator(TrainSampleList, FilterEnergy(EnergyCut), max=NSamples, + # preprocessfunction=ProcessWireData(DownSampleSize,ScanWindowSize,Normalize), + # postprocessfunction=MergeInputs(), + # batchsize=BatchSize, + # shapes=shapes, + # n_threads=n_threads, + # multiplier=multiplier, + # cachefile=cachefile, + # **kwargs) + +# # Use DLGenerators to read data +# Train_genC = MakeGenerator(TrainSampleList, NSamples, + # cachefile="/tmp/LArTPCDNN-LArIAT-TrainEvent-Cache.h5") + +# Test_genC = MakeGenerator(TestSampleList, NTestSamples, + # cachefile="/tmp/LArTPCDNN-LArIAT-TestEvent-Cache.h5") + +# print "Train Class Index Map:", Train_genC.ClassIndexMap +# #print "Test Class Index Map:", Test_genC.ClassIndexMap + +# Cache=True + +# if Preload: + # print "Caching data in memory for faster processing after first epoch. Hope you have enough memory." + # Train_gen=Train_genC.PreloadGenerator() + # Test_gen=Test_genC.PreloadGenerator() +# elif Cache: + # print "Caching data on disk for faster processing after first epoch. Hope you have enough disk space." + # Train_gen=Train_genC.DiskCacheGenerator(n_threads_cache) + # Test_gen=Test_genC.DiskCacheGenerator(n_threads_cache) +# else: + # Train_gen=Train_genC.Generator() + # Test_gen=Test_genC.Generator() + + +# Build/Load the Model +from DLTools.ModelWrapper import ModelWrapper +from LArTPCDNN.Models import * + +# You can automatically load the latest previous training of this model. +if TestDefaultParam("LoadPreviousModel") and not LoadModel: + print "Looking for Previous Model to load." + ReconstructionModel=ModelWrapper(Name=Name, LoadPrevious=True,OutputBase=OutputBase) + +# You can load a previous model using "-L" option with the model directory. +if LoadModel: + print "Loading Model From:",LoadModel + if LoadModel[-1]=="/": LoadModel=LoadModel[:-1] + ReconstructionModel=ModelWrapper(Name=os.path.basename(LoadModel),InDir=os.path.dirname(LoadModel), + OutputBase=OutputBase) + ReconstructionModel.Load(LoadModel) + +if not ReconstructionModel.Model: + FailedLoad=True +else: + FailedLoad=False + +# Or Build the model from scratch +if FailedLoad: + import keras + print "Building Model...", + + ReconstructionModel=Model2DViewsTo3D(Name, View1, View2, Width, Depth, + BatchSize, NClasses, + init=TestDefaultParam("WeightInitialization",'normal'), + #activation=TestDefaultParam("activation","relu"), + Dropout=TestDefaultParam("DropoutLayers",0.5), + BatchNormalization=TestDefaultParam("BatchNormLayers",False), + OutputBase=OutputBase) + + ReconstructionModel.Build() + print " Done." + + +print "Output Directory:",ReconstructionModel.OutDir +# Store the Configuration Dictionary +ReconstructionModel.MetaData["Configuration"]=Config +if "HyperParamSet" in dir(): + ReconstructionModel.MetaData["HyperParamSet"]=HyperParamSet + +# Print out the Model Summary +ReconstructionModel.Model.summary() + +# Compile The Model +print "Compiling Model." +ReconstructionModel.BuildOptimizer(optimizer,Config) +ReconstructionModel.Compile(Metrics=["accuracy"]) + +# Train +if Train or (RecoverMode and FailedLoad): + print "Training." + # Setup Callbacks + # These are all optional. + from DLTools.CallBacks import TimeStopping, GracefulExit + from keras.callbacks import * + callbacks=[ ] + + # Still testing this... + + if TestDefaultParam("UseGracefulExit",0): + print "Adding GracefulExit Callback." + callbacks.append( GracefulExit() ) + + if TestDefaultParam("ModelCheckpoint",False): + ReconstructionModel.MakeOutputDir() + callbacks.append(ModelCheckpoint(ReconstructionModel.OutDir+"/Checkpoint.Weights.h5", + monitor=TestDefaultParam("monitor","val_loss"), + save_best_only=TestDefaultParam("ModelCheckpoint_save_best_only"), + save_weights_only=TestDefaultParam("ModelCheckpoint_save_weights_only"), + mode=TestDefaultParam("ModelCheckpoint_mode","auto"), + period=TestDefaultParam("ModelCheckpoint_period",1), + verbose=0)) + + if TestDefaultParam("EarlyStopping"): + callbacks.append(keras.callbacks.EarlyStopping(monitor=TestDefaultParam("monitor","val_loss"), + min_delta=TestDefaultParam("EarlyStopping_min_delta",0.01), + patience=TestDefaultParam("EarlyStopping_patience"), + mode=TestDefaultParam("EarlyStopping_mode",'auto'), + verbose=0)) + + + if TestDefaultParam("RunningTime"): + print "Setting Runningtime to",RunningTime,"." + TSCB=TimeStopping(TestDefaultParam("RunningTime",3600*6),verbose=False) + callbacks.append(TSCB) + + + # Don't fill the log files with progress bar. + if sys.flags.interactive: + verbose=1 + else: + verbose=1 # Set to 2 + + print "Evaluating score on test sample..." + score = ReconstructionModel.Model.evaluate_generator(Test_gen, steps=NTestSamples/BatchSize) + + print "Initial Score:", score + ReconstructionModel.MetaData["InitialScore"]=score + + ReconstructionModel.History = ReconstructionModel.Model.fit_generator(Train_gen, + steps_per_epoch=(NSamples/BatchSize), + epochs=Epochs, + verbose=verbose, + validation_data=Test_gen, + validation_steps=NTestSamples/BatchSize, + callbacks=callbacks) + + score = ReconstructionModel.Model.evaluate_generator(Test_gen, steps=NTestSamples/BatchSize) + + + print "Evaluating score on test sample..." + print "Final Score:", score + ReconstructionModel.MetaData["FinalScore"]=score + + if TestDefaultParam("RunningTime"): + ReconstructionModel.MetaData["EpochTime"]=TSCB.history + + # Store the parameters used for scanning for easier tables later: + for k in Params: + ReconstructionModel.MetaData[k]=Config[k] + + # Save Model + ReconstructionModel.Save() +else: + print "Skipping Training." + +# Analysis +if Analyze: + Test_genC = MakeGenerator(TestSampleList, NTestSamples, + cachefile=Test_genC.cachefilename) #"/tmp/LArTPCDNN-LArIAT-TestEvent-Cache.h5") + + Test_genC.PreloadData(n_threads_cache) + [Test_X_View1, Test_X_View2], Test_Y = MergeInputs()(tuple(Test_genC.D)) + + from DLAnalysis.Classification import MultiClassificationAnalysis + result,NewMetaData=MultiClassificationAnalysis(ReconstructionModel,[Test_X_View1,Test_X_View2], + Test_Y,BatchSize,PDFFileName="ROC", + IndexMap=Test_genC.ClassIndexMap) + + ReconstructionModel.MetaData.update(NewMetaData) + + # Save again, in case Analysis put anything into the Model MetaData + if not sys.flags.interactive: + ReconstructionModel.Save() + else: + print "Warning: Interactive Mode. Use ReconstructionModel.Save() to save Analysis Results." + +# Make sure all of the Generators processes and threads are dead. +# Not necessary... but ensures a graceful exit. +# if not sys.flags.interactive: +# for g in GeneratorClasses: +# try: +# g.StopFiller() +# g.StopWorkers() +# except: +# pass diff --git a/ClassificationExperiment.py b/ClassificationExperiment.py index 4675324..32e0969 100644 --- a/ClassificationExperiment.py +++ b/ClassificationExperiment.py @@ -6,39 +6,6 @@ # Process the ConfigFile execfile(ConfigFile) -# Now put config in the current scope. Must find a prettier way. -if "Config" in dir(): - for a in Config: - exec(a+"="+str(Config[a])) - -# Use "--Test" to run on less events and epochs. -OutputBase="TrainedModels" -if TestMode: - MaxEvents=int(20e3) - NTestSamples=int(20e2) - Epochs=2 - OutputBase+=".Test" - print "Test Mode: Set MaxEvents to",MaxEvents,"and Epochs to", Epochs - -if LowMemMode: - n_threads=1 - multiplier=1 - -# Calculate how many events will be used for training/validation. -NSamples=MaxEvents-NTestSamples - -# Function to help manage optional configurations. Checks and returns -# if an object is in current scope. Return default value if not. -def TestDefaultParam(Config): - def TestParamPrime(param,default=False): - if param in Config: - return eval(param) - else: - return default - return TestParamPrime - -TestDefaultParam=TestDefaultParam(dir()) - # Load the Data from LArTPCDNN.LoadData import * @@ -124,7 +91,7 @@ def MakeGenerator(SampleList,NSamples, FailedLoad=False # Or Build the model from scratch -if not MyModel.Model: +if FailedLoad: import keras print "Building Model...", diff --git a/ClassificationScanConfig.py b/ClassificationScanConfig.py index 3f48398..1a71517 100644 --- a/ClassificationScanConfig.py +++ b/ClassificationScanConfig.py @@ -123,3 +123,36 @@ else: for ii,c in enumerate(Combos): print "Combo["+str(ii)+"]="+str(c) + +# Now put config in the current scope. Must find a prettier way. +if "Config" in dir(): + for a in Config: + exec(a+"="+str(Config[a])) + +# Use "--Test" to run on less events and epochs. +OutputBase="TrainedModels" +if TestMode: + MaxEvents=int(20e3) + NTestSamples=int(20e2) + Epochs=2 + OutputBase+=".Test" + print "Test Mode: Set MaxEvents to",MaxEvents,"and Epochs to", Epochs + +if LowMemMode: + n_threads=1 + multiplier=1 + +# Calculate how many events will be used for training/validation. +NSamples=MaxEvents-NTestSamples + +# Function to help manage optional configurations. Checks and returns +# if an object is in current scope. Return default value if not. +def TestDefaultParam(Config): + def TestParamPrime(param,default=False): + if param in Config: + return eval(param) + else: + return default + return TestParamPrime + +TestDefaultParam=TestDefaultParam(dir()) \ No newline at end of file diff --git a/Models.py b/Models.py index b3443b8..3c5c233 100644 --- a/Models.py +++ b/Models.py @@ -85,14 +85,53 @@ def Build(self): self.Model=Model(MInputs,modelT) class Model2DViewsTo3D(ModelWrapper): - def __init__(self, Name, View1, View2, Voxels, **kwargs): + def __init__(self, Name, View1, View2, width=0, depth=0, BatchSize=2048, N_Classes, + init=0, BatchNormalization=False, Dropout=False, **kwargs): super(MergerModel, self).__init__(Name,**kwargs) - self.View1 = View1 - self.View2 = View2 - self.N_Classes = Voxels[0]*Voxels[1]*Voxels[2] - self.MetaData.update({"N_classes": self.N_classes, - "init": self.init}) + + + self.width=width + self.depth=depth + self.init=init + + self.Dropout=Dropout + self.BatchSize=BatchSize + self.BatchNormalization=BatchNormalization + + self.input1_shape = View1.shape + self.input2_shape = View2.shape + self.N_Classes = N_Classes + + self.MetaData.update({ "width":self.width, + "depth":self.depth, + "Dropout":self.Dropout, + "BatchNormalization":BatchNormalization, + "input1_shape":self.input1_shape, + "input2_shape":self.input2_shape, + "N_classes":self.N_classes, + "init":self.init}) def Build(self): - modelT = concatenate([self.View1, self.View2]) - modelT = Dense(self.N_Classes, activation='softmax',kernel_initializer=self.init)(modelT) + input1=Input(self.input1_shape) + input2=Input(self.input2_shape) + input1 = Flatten(input_shape=self.input1_shape)(input1) + input2 = Flatten(input_shape=self.input2_shape)(input2) + modelT = concatenate([input1, input2]) + + #model.add(Dense(self.width,init=self.init)) + modelT = (Activation('relu')(modelT)) + + for i in xrange(0,self.depth): + if self.BatchNormalization: + modelT=BatchNormalization()(modelT) + + modelT=Dense(self.width,kernel_initializer=self.init)(modelT) + modelT=Activation(self.Activation)(modelT) + + if self.Dropout: + modelT=Dropout(self.Dropout)(modelT) + + if not self.NoClassificationLayer: + modelT=Dense(self.N_classes, activation='softmax',kernel_initializer=self.init)(modelT) + + self.Model=Model(input,modelT) From a79f3c806af3cdb81a6d4c8367661562e74f9fa2 Mon Sep 17 00:00:00 2001 From: Garrett Brown Date: Tue, 2 May 2017 18:41:35 -0500 Subject: [PATCH 03/22] Got it running --- 3DReconstructionExperiment.py | 6 ++++-- Models.py | 13 ++++++------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/3DReconstructionExperiment.py b/3DReconstructionExperiment.py index 255ade5..718d14e 100644 --- a/3DReconstructionExperiment.py +++ b/3DReconstructionExperiment.py @@ -7,7 +7,6 @@ execfile(ConfigFile) # Load the Data. TODO - from LArTPCDNN.LoadData import * # TrainSampleList,TestSampleList=DivideFiles(FileSearch,[float(NSamples)/MaxEvents,float(NTestSamples)/MaxEvents], @@ -93,7 +92,10 @@ import keras print "Building Model...", - ReconstructionModel=Model2DViewsTo3D(Name, View1, View2, Width, Depth, + View1Shape = (240, 4096) + View2Shape = (240, 4096) + + ReconstructionModel=Model2DViewsTo3D(Name, View1Shape, View2Shape, Width, Depth, BatchSize, NClasses, init=TestDefaultParam("WeightInitialization",'normal'), #activation=TestDefaultParam("activation","relu"), diff --git a/Models.py b/Models.py index 3c5c233..151e75e 100644 --- a/Models.py +++ b/Models.py @@ -85,9 +85,9 @@ def Build(self): self.Model=Model(MInputs,modelT) class Model2DViewsTo3D(ModelWrapper): - def __init__(self, Name, View1, View2, width=0, depth=0, BatchSize=2048, N_Classes, + def __init__(self, Name, View1Shape, View2Shape, width=0, depth=0, BatchSize=2048, N_Classes=0, init=0, BatchNormalization=False, Dropout=False, **kwargs): - super(MergerModel, self).__init__(Name,**kwargs) + super(Model2DViewsTo3D, self).__init__(Name,**kwargs) self.width=width @@ -98,8 +98,8 @@ def __init__(self, Name, View1, View2, width=0, depth=0, BatchSize=2048, N_Class self.BatchSize=BatchSize self.BatchNormalization=BatchNormalization - self.input1_shape = View1.shape - self.input2_shape = View2.shape + self.input1_shape = View1Shape + self.input2_shape = View2Shape self.N_Classes = N_Classes self.MetaData.update({ "width":self.width, @@ -108,7 +108,7 @@ def __init__(self, Name, View1, View2, width=0, depth=0, BatchSize=2048, N_Class "BatchNormalization":BatchNormalization, "input1_shape":self.input1_shape, "input2_shape":self.input2_shape, - "N_classes":self.N_classes, + "N_classes":self.N_Classes, "init":self.init}) def Build(self): @@ -131,7 +131,6 @@ def Build(self): if self.Dropout: modelT=Dropout(self.Dropout)(modelT) - if not self.NoClassificationLayer: - modelT=Dense(self.N_classes, activation='softmax',kernel_initializer=self.init)(modelT) + modelT=Dense(self.N_Classes, activation='softmax',kernel_initializer=self.init)(modelT) self.Model=Model(input,modelT) From b8584694894de085d475d0e63216a2e8c8d1d55b Mon Sep 17 00:00:00 2001 From: Garrett Brown Date: Wed, 3 May 2017 15:56:11 -0500 Subject: [PATCH 04/22] Rename 3D reconstruction experiment and undo changes to afarbin's files. --- ClassificationExperiment.py | 33 +++++++++++ ClassificationScanConfig.py | 33 ----------- Models.py | 51 ---------------- ...periment.py => ReconstructionExperiment.py | 36 ++++++++++++ ReconstructionModels.py | 58 +++++++++++++++++++ 5 files changed, 127 insertions(+), 84 deletions(-) rename 3DReconstructionExperiment.py => ReconstructionExperiment.py (91%) create mode 100644 ReconstructionModels.py diff --git a/ClassificationExperiment.py b/ClassificationExperiment.py index 32e0969..ed5c1b4 100644 --- a/ClassificationExperiment.py +++ b/ClassificationExperiment.py @@ -6,6 +6,39 @@ # Process the ConfigFile execfile(ConfigFile) +# Now put config in the current scope. Must find a prettier way. +if "Config" in dir(): + for a in Config: + exec(a+"="+str(Config[a])) + +# Use "--Test" to run on less events and epochs. +OutputBase="TrainedModels" +if TestMode: + MaxEvents=int(20e3) + NTestSamples=int(20e2) + Epochs=10 + OutputBase+=".Test" + print "Test Mode: Set MaxEvents to",MaxEvents,"and Epochs to", Epochs + +if LowMemMode: + n_threads=1 + multiplier=1 + +# Calculate how many events will be used for training/validation. +NSamples=MaxEvents-NTestSamples + +# Function to help manage optional configurations. Checks and returns +# if an object is in current scope. Return default value if not. +def TestDefaultParam(Config): + def TestParamPrime(param,default=False): + if param in Config: + return eval(param) + else: + return default + return TestParamPrime + +TestDefaultParam=TestDefaultParam(dir()) + # Load the Data from LArTPCDNN.LoadData import * diff --git a/ClassificationScanConfig.py b/ClassificationScanConfig.py index 3020204..2273afc 100644 --- a/ClassificationScanConfig.py +++ b/ClassificationScanConfig.py @@ -126,36 +126,3 @@ else: for ii,c in enumerate(Combos): print "Combo["+str(ii)+"]="+str(c) - -# Now put config in the current scope. Must find a prettier way. -if "Config" in dir(): - for a in Config: - exec(a+"="+str(Config[a])) - -# Use "--Test" to run on less events and epochs. -OutputBase="TrainedModels" -if TestMode: - MaxEvents=int(20e3) - NTestSamples=int(20e2) - Epochs=10 - OutputBase+=".Test" - print "Test Mode: Set MaxEvents to",MaxEvents,"and Epochs to", Epochs - -if LowMemMode: - n_threads=1 - multiplier=1 - -# Calculate how many events will be used for training/validation. -NSamples=MaxEvents-NTestSamples - -# Function to help manage optional configurations. Checks and returns -# if an object is in current scope. Return default value if not. -def TestDefaultParam(Config): - def TestParamPrime(param,default=False): - if param in Config: - return eval(param) - else: - return default - return TestParamPrime - -TestDefaultParam=TestDefaultParam(dir()) \ No newline at end of file diff --git a/Models.py b/Models.py index 151e75e..ceb3b68 100644 --- a/Models.py +++ b/Models.py @@ -83,54 +83,3 @@ def Build(self): self.modelT=modelT self.Model=Model(MInputs,modelT) - -class Model2DViewsTo3D(ModelWrapper): - def __init__(self, Name, View1Shape, View2Shape, width=0, depth=0, BatchSize=2048, N_Classes=0, - init=0, BatchNormalization=False, Dropout=False, **kwargs): - super(Model2DViewsTo3D, self).__init__(Name,**kwargs) - - - self.width=width - self.depth=depth - self.init=init - - self.Dropout=Dropout - self.BatchSize=BatchSize - self.BatchNormalization=BatchNormalization - - self.input1_shape = View1Shape - self.input2_shape = View2Shape - self.N_Classes = N_Classes - - self.MetaData.update({ "width":self.width, - "depth":self.depth, - "Dropout":self.Dropout, - "BatchNormalization":BatchNormalization, - "input1_shape":self.input1_shape, - "input2_shape":self.input2_shape, - "N_classes":self.N_Classes, - "init":self.init}) - - def Build(self): - input1=Input(self.input1_shape) - input2=Input(self.input2_shape) - input1 = Flatten(input_shape=self.input1_shape)(input1) - input2 = Flatten(input_shape=self.input2_shape)(input2) - modelT = concatenate([input1, input2]) - - #model.add(Dense(self.width,init=self.init)) - modelT = (Activation('relu')(modelT)) - - for i in xrange(0,self.depth): - if self.BatchNormalization: - modelT=BatchNormalization()(modelT) - - modelT=Dense(self.width,kernel_initializer=self.init)(modelT) - modelT=Activation(self.Activation)(modelT) - - if self.Dropout: - modelT=Dropout(self.Dropout)(modelT) - - modelT=Dense(self.N_Classes, activation='softmax',kernel_initializer=self.init)(modelT) - - self.Model=Model(input,modelT) diff --git a/3DReconstructionExperiment.py b/ReconstructionExperiment.py similarity index 91% rename from 3DReconstructionExperiment.py rename to ReconstructionExperiment.py index 718d14e..125d76c 100644 --- a/3DReconstructionExperiment.py +++ b/ReconstructionExperiment.py @@ -6,6 +6,42 @@ # Process the ConfigFile execfile(ConfigFile) +# Now put config in the current scope. Must find a prettier way. +if "Config" in dir(): + for a in Config: + exec (a + "=" + str(Config[a])) + +# Use "--Test" to run on less events and epochs. +OutputBase = "TrainedModels" +if TestMode: + MaxEvents = int(20e3) + NTestSamples = int(20e2) + Epochs = 10 + OutputBase += ".Test" + print "Test Mode: Set MaxEvents to", MaxEvents, "and Epochs to", Epochs + +if LowMemMode: + n_threads = 1 + multiplier = 1 + +# Calculate how many events will be used for training/validation. +NSamples = MaxEvents - NTestSamples + + +# Function to help manage optional configurations. Checks and returns +# if an object is in current scope. Return default value if not. +def TestDefaultParam(Config): + def TestParamPrime(param, default=False): + if param in Config: + return eval(param) + else: + return default + + return TestParamPrime + + +TestDefaultParam = TestDefaultParam(dir()) + # Load the Data. TODO from LArTPCDNN.LoadData import * diff --git a/ReconstructionModels.py b/ReconstructionModels.py new file mode 100644 index 0000000..1b06e56 --- /dev/null +++ b/ReconstructionModels.py @@ -0,0 +1,58 @@ +from DLTools.ModelWrapper import * + +from keras.layers.merge import concatenate +from keras.models import Sequential, Model +from keras.layers.core import Dense, Activation +from keras.layers import BatchNormalization,Dropout,Flatten, Input +from keras.models import model_from_json + +class Model2DViewsTo3D(ModelWrapper): + def __init__(self, Name, View1Shape, View2Shape, width=0, depth=0, BatchSize=2048, N_Classes=0, + init=0, BatchNormalization=False, Dropout=False, **kwargs): + super(Model2DViewsTo3D, self).__init__(Name,**kwargs) + + + self.width=width + self.depth=depth + self.init=init + + self.Dropout=Dropout + self.BatchSize=BatchSize + self.BatchNormalization=BatchNormalization + + self.input1_shape = View1Shape + self.input2_shape = View2Shape + self.N_Classes = N_Classes + + self.MetaData.update({ "width":self.width, + "depth":self.depth, + "Dropout":self.Dropout, + "BatchNormalization":BatchNormalization, + "input1_shape":self.input1_shape, + "input2_shape":self.input2_shape, + "N_classes":self.N_Classes, + "init":self.init}) + + def Build(self): + input1=Input(self.input1_shape) + input2=Input(self.input2_shape) + input1 = Flatten(input_shape=self.input1_shape)(input1) + input2 = Flatten(input_shape=self.input2_shape)(input2) + modelT = concatenate([input1, input2]) + + #model.add(Dense(self.width,init=self.init)) + modelT = (Activation('relu')(modelT)) + + for i in xrange(0,self.depth): + if self.BatchNormalization: + modelT=BatchNormalization()(modelT) + + modelT=Dense(self.width,kernel_initializer=self.init)(modelT) + modelT=Activation(self.Activation)(modelT) + + if self.Dropout: + modelT=Dropout(self.Dropout)(modelT) + + modelT=Dense(self.N_Classes, activation='softmax',kernel_initializer=self.init)(modelT) + + self.Model=Model(input,modelT) From cca49a98a311458265845344330b2b2c36b2eb03 Mon Sep 17 00:00:00 2001 From: binarysaurus Date: Wed, 3 May 2017 16:06:16 -0500 Subject: [PATCH 05/22] add from next --- LoadData3D.py | 144 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 144 insertions(+) create mode 100644 LoadData3D.py diff --git a/LoadData3D.py b/LoadData3D.py new file mode 100644 index 0000000..72c59a2 --- /dev/null +++ b/LoadData3D.py @@ -0,0 +1,144 @@ +import h5py +import glob,os,sys,time +import numpy as np + +from DLTools.ThreadedGenerator import DLMultiClassGenerator,DLMultiClassFilterGenerator + + +def LarTPCDataGeneratorOld(Directory="/data/", batchsize=16, datasets=[u'3DImages'],**kwargs): + + Samples = [ (Directory+"3d/", datasets, "signal" ), + (Directory+"/dnn_NEXT100_Bi214_bg_v2x2x2_r200x200x200.Tensor.h5", datasets, "background" )] + + def filterfunction(batchdict): + r= np.array(range(batchdict["3DImages"].shape[0])) + return r[0] + + + GC= DLMultiClassFilterGenerator(Samples, batchsize=batchsize, FilterFunc=False, + OneHot=True, shapes = [(batchsize, 200,200,200), (batchsize, 2)], **kwargs) + return GC + + +def LarTPCDataGenerator(Directory="/data", batchsize=16, datasets=['images3D/C','images3D/V'], Norm=True, + bins=(240,240,256),**kwargs): + + Samples = [ (Directory+"muon_158.3d.h5", datasets)] + + def MakeImage(bins,Norm=True): + def f(D): + for i in xrange(D[0].shape[0]): + if Norm: + w=np.tanh(np.sign(D[1][i]) * np.log(np.abs(D[1][i]) + 1.0) / 2.0) + else: + w=D[1][i] + R,b=np.histogramdd(D[0][i], bins=list(bins), weights=w) + return [R]+D[2:] + return f + + + GC= DLMultiClassGenerator(Samples, batchsize=batchsize, + preprocessfunction=MakeImage(bins,False), + OneHot=True, + shapes = [(batchsize,)+bins,(batchsize, 2)], + **kwargs) + return GC + + +Test=1 + +if __name__ == '__main__' and Test==0: + import sys + Directory="/data/datasets/LarTPC/apr_9/3d/" + + try: + n_threads=int(sys.argv[1]) + except: + n_threads=6 + + try: + n_threads2=int(sys.argv[2]) + except: + n_threads2=n_threads + + Train_gen=LarTPCDataGeneratorOld(Directory,n_threads=n_threads,max=100000, verbose=False) + + print "Generator Ready" + print "ClassIndex:", Train_gen.ClassIndexMap + print "Object Shape:",Train_gen.shapes + sys.stdout.flush() + + N=1 + NN=n_threads + count=0 + old=start=time.time() + for tries in xrange(1): + print "*********************Try:",tries + #for D in Train_gen.Generator(): + for D in Train_gen.Generator(): + NN-=0 + if NN<0: + break + start1=time.time() + Delta=(start1-start) + Delta2=(start1-old) + old=start1 + print count,":",Delta, ":",Delta/float(N), Delta2 + sys.stdout.flush() + N+=1 + for d in D: + print d.shape + #print d[np.where(d!=0.)] + NN=d.shape[0] + #print d[0] + pass + count+=NN + + +if __name__ == '__main__' and Test==1: + import sys + Directory="/data/datasets/LarTPC/" + + try: + n_threads=int(sys.argv[1]) + except: + n_threads=6 + + try: + n_threads2=int(sys.argv[2]) + except: + n_threads2=n_threads + + Train_gen=LarTPCDataGenerator(Directory,n_threads=n_threads,max=100000, + bins=(100,100,100),verbose=False) + + print "Generator Ready" + print "ClassIndex:", Train_gen.ClassIndexMap + print "Object Shape:",Train_gen.shapes + sys.stdout.flush() + + N=1 + NN=n_threads + count=0 + old=start=time.time() + for tries in xrange(1): + print "*********************Try:",tries + #for D in Train_gen.Generator(): + for D in Train_gen.Generator(): + NN-=0 + if NN<0: + break + start1=time.time() + Delta=(start1-start) + Delta2=(start1-old) + old=start1 + print count,":",Delta, ":",Delta/float(N), Delta2 + sys.stdout.flush() + N+=1 + for d in D: + print d.shape + print d[np.where(d!=0.)] + NN=d.shape[0] + #print d[0] + pass + count+=NN \ No newline at end of file From 0a08f5c0564763215c89c04c9cef04fc020e8622 Mon Sep 17 00:00:00 2001 From: binarysaurus Date: Wed, 3 May 2017 16:13:18 -0500 Subject: [PATCH 06/22] Naming conventions --- ReconstructionExperiment.py => Recon3DExperiment.py | 0 LoadData3D.py => Recon3DLoadData.py | 0 ReconstructionModels.py => Recon3DModels.py | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename ReconstructionExperiment.py => Recon3DExperiment.py (100%) rename LoadData3D.py => Recon3DLoadData.py (100%) rename ReconstructionModels.py => Recon3DModels.py (100%) diff --git a/ReconstructionExperiment.py b/Recon3DExperiment.py similarity index 100% rename from ReconstructionExperiment.py rename to Recon3DExperiment.py diff --git a/LoadData3D.py b/Recon3DLoadData.py similarity index 100% rename from LoadData3D.py rename to Recon3DLoadData.py diff --git a/ReconstructionModels.py b/Recon3DModels.py similarity index 100% rename from ReconstructionModels.py rename to Recon3DModels.py From 68ce4f99722362ea960e85ae5f2f531d93776a2e Mon Sep 17 00:00:00 2001 From: pada1 Date: Wed, 3 May 2017 15:46:59 -0700 Subject: [PATCH 07/22] Added 3d Convolutional Model --- Recon3DModels.py | 74 +++++++++++++++++++++++++++++++++++++++++++++-- Recon3DModels.py~ | 58 +++++++++++++++++++++++++++++++++++++ 2 files changed, 130 insertions(+), 2 deletions(-) create mode 100644 Recon3DModels.py~ diff --git a/Recon3DModels.py b/Recon3DModels.py index 1b06e56..ed2e3b3 100644 --- a/Recon3DModels.py +++ b/Recon3DModels.py @@ -5,11 +5,16 @@ from keras.layers.core import Dense, Activation from keras.layers import BatchNormalization,Dropout,Flatten, Input from keras.models import model_from_json +from keras.layers import Input, Dense, Conv2D, MaxPooling2D, Conv3D, UpSampling3D +from keras.models import Model +from keras import backend as K +from keras.callbacks import TensorBoard +import numpy as np -class Model2DViewsTo3D(ModelWrapper): +class Model2DViewsTo3DDense(ModelWrapper): def __init__(self, Name, View1Shape, View2Shape, width=0, depth=0, BatchSize=2048, N_Classes=0, init=0, BatchNormalization=False, Dropout=False, **kwargs): - super(Model2DViewsTo3D, self).__init__(Name,**kwargs) + super(Model2DViewsTo3DDense, self).__init__(Name,**kwargs) self.width=width @@ -56,3 +61,68 @@ def Build(self): modelT=Dense(self.N_Classes, activation='softmax',kernel_initializer=self.init)(modelT) self.Model=Model(input,modelT) + +############################################################################################################# + +class Model2DViewsTo3DConv(ModelWrapper): + def __init__(self, Name, View1Shape, View2Shape, width=0, depth=0, BatchSize=2048, N_Classes=0, + init=0, BatchNormalization=False, Dropout=False, **kwargs): + super(Model2DViewsTo3DConv, self).__init__(Name,**kwargs) + + + self.width=width + self.depth=depth + self.init=init + + self.Dropout=Dropout + self.BatchSize=BatchSize + self.BatchNormalization=BatchNormalization + + self.input1_shape = View1Shape + self.input2_shape = View2Shape + self.N_Classes = N_Classes + + self.MetaData.update({ "width":self.width, + "depth":self.depth, + "Dropout":self.Dropout, + "BatchNormalization":BatchNormalization, + "input1_shape":self.input1_shape, + "input2_shape":self.input2_shape, + "N_classes":self.N_Classes, + "init":self.init}) + def Build(self): + input1=Input(self.input1_shape) + input2=Input(self.input2_shape) + + input_img = Input(shape=(240,4086, 1)) # adapt this if using `channels_first` image data format + + x = Conv2D(64, (3, 3), strides=(1, 1), activation='relu', padding='same')(input_img) + x = MaxPooling2D((2, 2), padding='same')(x) + x = Conv2D(32, (3, 3), activation='relu', padding='same')(x) + x = MaxPooling2D((2, 2), padding='same')(x) + x = Conv2D(8, (3, 3), activation='relu', padding='same')(x) + encoded1 = MaxPooling2D((2, 2), padding='same')(x) + + y = Conv2D(64, (3, 3), strides=(1, 1), activation='relu', padding='same')(input_img) + y = MaxPooling2D((2, 2), padding='same')(y) + y = Conv2D(32, (3, 3), activation='relu', padding='same')(y) + y = MaxPooling2D((2, 2), padding='same')(y) + y = Conv2D(8, (3, 3), activation='relu', padding='same')(y) + encoded2 = MaxPooling2D((2, 2), padding='same')(y) + + #concatenate images + z = concatenate([encoded1, encoded2]) + + #Now decode in 3D + z = Conv3D(8, (3, 3), activation='relu', padding='same')(z) + z = UpSampling3D((2, 2))(z) + z = Conv3D(32, (3, 3), activation='relu', padding='same')(z) + z = UpSampling3D((2, 2))(z) + z = Conv3D(64, (3, 3), activation='relu')(z) + z = UpSampling3D((2, 2))(z) + decoded = Conv3D(1, (3, 3), activation='sigmoid', padding='same')(z) + + autoencoder = Model(inputs=[input1, input2], outputs=decoded) + autoencoder.compile(optimizer='adadelta', loss='binary_crossentropy') + + self.Model=Model(input,modelT) diff --git a/Recon3DModels.py~ b/Recon3DModels.py~ new file mode 100644 index 0000000..1b06e56 --- /dev/null +++ b/Recon3DModels.py~ @@ -0,0 +1,58 @@ +from DLTools.ModelWrapper import * + +from keras.layers.merge import concatenate +from keras.models import Sequential, Model +from keras.layers.core import Dense, Activation +from keras.layers import BatchNormalization,Dropout,Flatten, Input +from keras.models import model_from_json + +class Model2DViewsTo3D(ModelWrapper): + def __init__(self, Name, View1Shape, View2Shape, width=0, depth=0, BatchSize=2048, N_Classes=0, + init=0, BatchNormalization=False, Dropout=False, **kwargs): + super(Model2DViewsTo3D, self).__init__(Name,**kwargs) + + + self.width=width + self.depth=depth + self.init=init + + self.Dropout=Dropout + self.BatchSize=BatchSize + self.BatchNormalization=BatchNormalization + + self.input1_shape = View1Shape + self.input2_shape = View2Shape + self.N_Classes = N_Classes + + self.MetaData.update({ "width":self.width, + "depth":self.depth, + "Dropout":self.Dropout, + "BatchNormalization":BatchNormalization, + "input1_shape":self.input1_shape, + "input2_shape":self.input2_shape, + "N_classes":self.N_Classes, + "init":self.init}) + + def Build(self): + input1=Input(self.input1_shape) + input2=Input(self.input2_shape) + input1 = Flatten(input_shape=self.input1_shape)(input1) + input2 = Flatten(input_shape=self.input2_shape)(input2) + modelT = concatenate([input1, input2]) + + #model.add(Dense(self.width,init=self.init)) + modelT = (Activation('relu')(modelT)) + + for i in xrange(0,self.depth): + if self.BatchNormalization: + modelT=BatchNormalization()(modelT) + + modelT=Dense(self.width,kernel_initializer=self.init)(modelT) + modelT=Activation(self.Activation)(modelT) + + if self.Dropout: + modelT=Dropout(self.Dropout)(modelT) + + modelT=Dense(self.N_Classes, activation='softmax',kernel_initializer=self.init)(modelT) + + self.Model=Model(input,modelT) From 52adcda46c1a06cdd1c858cf152bfa8d94e557be Mon Sep 17 00:00:00 2001 From: cloudy Date: Wed, 3 May 2017 19:50:29 -0500 Subject: [PATCH 08/22] fix --- Recon3DLoadData.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Recon3DLoadData.py b/Recon3DLoadData.py index 72c59a2..c0df086 100644 --- a/Recon3DLoadData.py +++ b/Recon3DLoadData.py @@ -23,7 +23,7 @@ def filterfunction(batchdict): def LarTPCDataGenerator(Directory="/data", batchsize=16, datasets=['images3D/C','images3D/V'], Norm=True, bins=(240,240,256),**kwargs): - Samples = [ (Directory+"muon_158.3d.h5", datasets)] + Samples = [ (Directory+"muon_158.3d.h5", datasets, "data")] def MakeImage(bins,Norm=True): def f(D): @@ -97,7 +97,7 @@ def f(D): if __name__ == '__main__' and Test==1: import sys - Directory="/data/datasets/LarTPC/" + Directory="/data/cloud/project/data/apr_9/3d/" try: n_threads=int(sys.argv[1]) @@ -110,7 +110,7 @@ def f(D): n_threads2=n_threads Train_gen=LarTPCDataGenerator(Directory,n_threads=n_threads,max=100000, - bins=(100,100,100),verbose=False) + bins=(240,240,256),verbose=False) print "Generator Ready" print "ClassIndex:", Train_gen.ClassIndexMap From 2da20d08fbf5416343d242802e5d9bcc8a1db64c Mon Sep 17 00:00:00 2001 From: binarysaurus Date: Wed, 3 May 2017 20:38:05 -0500 Subject: [PATCH 09/22] Loads 3D data with generator --- LoadData.py | 11 ++- Recon3DLoadData.py | 201 +++++++++++++++++++++++++-------------------- 2 files changed, 119 insertions(+), 93 deletions(-) diff --git a/LoadData.py b/LoadData.py index b923959..911bd54 100644 --- a/LoadData.py +++ b/LoadData.py @@ -126,6 +126,7 @@ def LArIATDataGenerator(FileSearch="/data/LArIAT/*.h5",DownSampleSize=4, ScanWin Files = glob.glob(FileSearch) print "Found",len(Files),"files." + Files.sort() if MaxFiles!=-1: random.shuffle(Files) @@ -216,8 +217,10 @@ def DivideFiles(FileSearch="/data/LArIAT/h5_files/*.h5",Fractions=[.9,.1],datase if __name__ == '__main__': import sys - FileSearch="/data/LArIAT/h5_files/*.h5" - + #FileSearch="/data/LArIAT/h5_files/*.h5" + + # CHANGE TO /data/cloud/project/data/apr_9/2d/*.h5 + FileSearch="/data/datasets/LarTPC/apr_9/2d/*.h5" try: n_threads=int(sys.argv[1]) except: @@ -240,14 +243,14 @@ def DivideFiles(FileSearch="/data/LArIAT/h5_files/*.h5",Fractions=[.9,.1],datase Normalize=True closefiles=False Train_gen=LArIATDataGenerator(FileSearch=FileSearch, - cachefile="LArIAT-LoadDataTest-Cache.h5", + #cachefile="LArIAT-LoadDataTest-Cache.h5", max=128*10000, batchsize=128, DownSampleSize=DownSampleSize, ScanWindowSize=ScanWindowSize, Norm=Normalize, #shapes=[(128*m, 2, 240, 4096/DownSampleSize), (128*m, 16)], - shapes=[(128*m, 2, 240, ScanWindowSize), (128*m, 16)], + shapes=[(128*m, 240, ScanWindowSize), (128, 240, 256)], n_threads=n_threads, SharedDataQueueSize=1, multiplier=m, diff --git a/Recon3DLoadData.py b/Recon3DLoadData.py index c0df086..a4bea50 100644 --- a/Recon3DLoadData.py +++ b/Recon3DLoadData.py @@ -4,53 +4,17 @@ from DLTools.ThreadedGenerator import DLMultiClassGenerator,DLMultiClassFilterGenerator +def main(): + datapath="/data/datasets/LarTPC/apr_9/" -def LarTPCDataGeneratorOld(Directory="/data/", batchsize=16, datasets=[u'3DImages'],**kwargs): - - Samples = [ (Directory+"3d/", datasets, "signal" ), - (Directory+"/dnn_NEXT100_Bi214_bg_v2x2x2_r200x200x200.Tensor.h5", datasets, "background" )] - - def filterfunction(batchdict): - r= np.array(range(batchdict["3DImages"].shape[0])) - return r[0] + #Pull in datafiles + filelist2d = glob.glob(datapath + "2d/*") + filelist3d = glob.glob(datapath + "3d/*") + filelist2d.sort() + filelist3d.sort() + assert len(filelist2d) == len(filelist3d), "Number of 2D and 3D files mismatch!" - GC= DLMultiClassFilterGenerator(Samples, batchsize=batchsize, FilterFunc=False, - OneHot=True, shapes = [(batchsize, 200,200,200), (batchsize, 2)], **kwargs) - return GC - - -def LarTPCDataGenerator(Directory="/data", batchsize=16, datasets=['images3D/C','images3D/V'], Norm=True, - bins=(240,240,256),**kwargs): - - Samples = [ (Directory+"muon_158.3d.h5", datasets, "data")] - - def MakeImage(bins,Norm=True): - def f(D): - for i in xrange(D[0].shape[0]): - if Norm: - w=np.tanh(np.sign(D[1][i]) * np.log(np.abs(D[1][i]) + 1.0) / 2.0) - else: - w=D[1][i] - R,b=np.histogramdd(D[0][i], bins=list(bins), weights=w) - return [R]+D[2:] - return f - - - GC= DLMultiClassGenerator(Samples, batchsize=batchsize, - preprocessfunction=MakeImage(bins,False), - OneHot=True, - shapes = [(batchsize,)+bins,(batchsize, 2)], - **kwargs) - return GC - - -Test=1 - -if __name__ == '__main__' and Test==0: - import sys - Directory="/data/datasets/LarTPC/apr_9/3d/" - try: n_threads=int(sys.argv[1]) except: @@ -61,7 +25,8 @@ def f(D): except: n_threads2=n_threads - Train_gen=LarTPCDataGeneratorOld(Directory,n_threads=n_threads,max=100000, verbose=False) + Train_gen=LarTPCDataGenerator(filelist3d, n_threads=n_threads,max=100000, + bins=(240,240,256),verbose=False) print "Generator Ready" print "ClassIndex:", Train_gen.ClassIndexMap @@ -75,6 +40,7 @@ def f(D): for tries in xrange(1): print "*********************Try:",tries #for D in Train_gen.Generator(): + #GENERATOR CALLED HERE, FEED THIS TO OUTPUT FOR 3D for D in Train_gen.Generator(): NN-=0 if NN<0: @@ -88,57 +54,114 @@ def f(D): N+=1 for d in D: print d.shape - #print d[np.where(d!=0.)] + print d[np.where(d!=0.)] NN=d.shape[0] #print d[0] pass count+=NN -if __name__ == '__main__' and Test==1: - import sys - Directory="/data/cloud/project/data/apr_9/3d/" - try: - n_threads=int(sys.argv[1]) - except: - n_threads=6 +def LarTPCDataGenerator(files="/data", batchsize=16, datasets=['images3D/C','images3D/V'], Norm=True, + bins=(240,240,256),**kwargs): + + Samples = [] - try: - n_threads2=int(sys.argv[2]) - except: - n_threads2=n_threads + for F in files: + basename=os.path.basename(F) + ParticleName=basename.split("_")[0] + Samples.append((F,datasets,ParticleName)) + + #Samples = [ (Directory+"muon_158.2d.h5", datasets, "data")] + + def MakeImage(bins,Norm=True): + def f(D): + for i in xrange(D[0].shape[0]): + if Norm: + w=np.tanh(np.sign(D[1][i]) * np.log(np.abs(D[1][i]) + 1.0) / 2.0) + else: + w=D[1][i] + R,b=np.histogramdd(D[0][i], bins=list(bins), weights=w) + return [R]+D[2:] + return f - Train_gen=LarTPCDataGenerator(Directory,n_threads=n_threads,max=100000, - bins=(240,240,256),verbose=False) - - print "Generator Ready" - print "ClassIndex:", Train_gen.ClassIndexMap - print "Object Shape:",Train_gen.shapes - sys.stdout.flush() - N=1 - NN=n_threads - count=0 - old=start=time.time() - for tries in xrange(1): - print "*********************Try:",tries - #for D in Train_gen.Generator(): - for D in Train_gen.Generator(): - NN-=0 - if NN<0: - break - start1=time.time() - Delta=(start1-start) - Delta2=(start1-old) - old=start1 - print count,":",Delta, ":",Delta/float(N), Delta2 - sys.stdout.flush() - N+=1 - for d in D: - print d.shape - print d[np.where(d!=0.)] - NN=d.shape[0] - #print d[0] - pass - count+=NN \ No newline at end of file + GC= DLMultiClassGenerator(Samples, batchsize=batchsize, + preprocessfunction=MakeImage(bins,False), + OneHot=True, + shapes = [(batchsize,)+bins,(batchsize, 2)], + **kwargs) + return GC + + + +# +#def LarTPCDataGeneratorOld(Directory="/data/", batchsize=16, datasets=[u'3DImages'],**kwargs): +# +# Samples = [ (Directory+"3d/", datasets, "signal" ), +# (Directory+"/dnn_NEXT100_Bi214_bg_v2x2x2_r200x200x200.Tensor.h5", datasets, "background" )] +# +# def filterfunction(batchdict): +# r= np.array(range(batchdict["3DImages"].shape[0])) +# return r[0] +# +# +# GC= DLMultiClassFilterGenerator(Samples, batchsize=batchsize, FilterFunc=False, +# OneHot=True, shapes = [(batchsize, 200,200,200), (batchsize, 2)], **kwargs) +# return GC + + + + +#Test=1 + +#if __name__ == '__main__' and Test==0: +# import sys +# Directory="/data/datasets/LarTPC/apr_9/3d/" +# +# try: +# n_threads=int(sys.argv[1]) +# except: +# n_threads=6 +# +# try: +# n_threads2=int(sys.argv[2]) +# except: +# n_threads2=n_threads +# +# Train_gen=LarTPCDataGeneratorOld(Directory,n_threads=n_threads,max=100000, verbose=False) +# +# print "Generator Ready" +# print "ClassIndex:", Train_gen.ClassIndexMap +# print "Object Shape:",Train_gen.shapes +# sys.stdout.flush() +# +# N=1 +# NN=n_threads +# count=0 +# old=start=time.time() +# for tries in xrange(1): +# print "*********************Try:",tries +# #for D in Train_gen.Generator(): +# for D in Train_gen.Generator(): +# NN-=0 +# if NN<0: +# break +# start1=time.time() +# Delta=(start1-start) +# Delta2=(start1-old) +# old=start1 +# print count,":",Delta, ":",Delta/float(N), Delta2 +# sys.stdout.flush() +# N+=1 +# for d in D: +# print d.shape +# #print d[np.where(d!=0.)] +# NN=d.shape[0] +# #print d[0] +# pass +# count+=NN + + +if __name__ == '__main__': + main() \ No newline at end of file From 8beecadd051a27437d5720f247e09336a826aec6 Mon Sep 17 00:00:00 2001 From: Garrett Brown Date: Thu, 4 May 2017 17:49:10 -0500 Subject: [PATCH 10/22] Formatted the code --- Recon3DExperiment.py | 185 +++++++++++++++++++++---------------------- Recon3DLoadData.py | 113 +++++++++++++------------- Recon3DModels.py | 178 ++++++++++++++++++++--------------------- 3 files changed, 236 insertions(+), 240 deletions(-) diff --git a/Recon3DExperiment.py b/Recon3DExperiment.py index 125d76c..8868e6f 100644 --- a/Recon3DExperiment.py +++ b/Recon3DExperiment.py @@ -1,4 +1,4 @@ -import sys,os,argparse +import sys, os, argparse # Parse the Arguments execfile("LArTPCDNN/ClassificationArguments.py") @@ -43,45 +43,45 @@ def TestParamPrime(param, default=False): TestDefaultParam = TestDefaultParam(dir()) # Load the Data. TODO -from LArTPCDNN.LoadData import * +from LArTPCDNN.LoadData import * # TrainSampleList,TestSampleList=DivideFiles(FileSearch,[float(NSamples)/MaxEvents,float(NTestSamples)/MaxEvents], - # datasetnames=[u'features'], - # Particles=Particles) +# datasetnames=[u'features'], +# Particles=Particles) # # Figure out the output shape... This is not necessary. But the automatic mechanism is inefficient. # if ScanWindowSize>0: # # shapes=[(BatchSize*multiplier, 2, 240, ScanWindowSize), (BatchSize*multiplier, NClasses)] - # shapes=[(BatchSize*multiplier, 240, ScanWindowSize), - # (BatchSize*multiplier, 240, ScanWindowSize), - # (BatchSize*multiplier, NClasses)] - # viewshape=(None, 240, ScanWindowSize) +# shapes=[(BatchSize*multiplier, 240, ScanWindowSize), +# (BatchSize*multiplier, 240, ScanWindowSize), +# (BatchSize*multiplier, NClasses)] +# viewshape=(None, 240, ScanWindowSize) # else: - # shapes=[(BatchSize*multiplier, 240, 4096/DownSampleSize), - # (BatchSize*multiplier, 240, 4096/DownSampleSize), - # (BatchSize*multiplier, NClasses)] +# shapes=[(BatchSize*multiplier, 240, 4096/DownSampleSize), +# (BatchSize*multiplier, 240, 4096/DownSampleSize), +# (BatchSize*multiplier, NClasses)] - # viewshape=(None, 240, 4096/DownSampleSize) +# viewshape=(None, 240, 4096/DownSampleSize) # def MakeGenerator(SampleList,NSamples, - # cachefile="LArIAT-LoadDataTest-Cache.h5",**kwargs): - - # return DLMultiClassFilterGenerator(TrainSampleList, FilterEnergy(EnergyCut), max=NSamples, - # preprocessfunction=ProcessWireData(DownSampleSize,ScanWindowSize,Normalize), - # postprocessfunction=MergeInputs(), - # batchsize=BatchSize, - # shapes=shapes, - # n_threads=n_threads, - # multiplier=multiplier, - # cachefile=cachefile, - # **kwargs) +# cachefile="LArIAT-LoadDataTest-Cache.h5",**kwargs): + +# return DLMultiClassFilterGenerator(TrainSampleList, FilterEnergy(EnergyCut), max=NSamples, +# preprocessfunction=ProcessWireData(DownSampleSize,ScanWindowSize,Normalize), +# postprocessfunction=MergeInputs(), +# batchsize=BatchSize, +# shapes=shapes, +# n_threads=n_threads, +# multiplier=multiplier, +# cachefile=cachefile, +# **kwargs) # # Use DLGenerators to read data # Train_genC = MakeGenerator(TrainSampleList, NSamples, - # cachefile="/tmp/LArTPCDNN-LArIAT-TrainEvent-Cache.h5") +# cachefile="/tmp/LArTPCDNN-LArIAT-TrainEvent-Cache.h5") # Test_genC = MakeGenerator(TestSampleList, NTestSamples, - # cachefile="/tmp/LArTPCDNN-LArIAT-TestEvent-Cache.h5") +# cachefile="/tmp/LArTPCDNN-LArIAT-TestEvent-Cache.h5") # print "Train Class Index Map:", Train_genC.ClassIndexMap # #print "Test Class Index Map:", Test_genC.ClassIndexMap @@ -89,16 +89,16 @@ def TestParamPrime(param, default=False): # Cache=True # if Preload: - # print "Caching data in memory for faster processing after first epoch. Hope you have enough memory." - # Train_gen=Train_genC.PreloadGenerator() - # Test_gen=Test_genC.PreloadGenerator() +# print "Caching data in memory for faster processing after first epoch. Hope you have enough memory." +# Train_gen=Train_genC.PreloadGenerator() +# Test_gen=Test_genC.PreloadGenerator() # elif Cache: - # print "Caching data on disk for faster processing after first epoch. Hope you have enough disk space." - # Train_gen=Train_genC.DiskCacheGenerator(n_threads_cache) - # Test_gen=Test_genC.DiskCacheGenerator(n_threads_cache) +# print "Caching data on disk for faster processing after first epoch. Hope you have enough disk space." +# Train_gen=Train_genC.DiskCacheGenerator(n_threads_cache) +# Test_gen=Test_genC.DiskCacheGenerator(n_threads_cache) # else: - # Train_gen=Train_genC.Generator() - # Test_gen=Test_genC.Generator() +# Train_gen=Train_genC.Generator() +# Test_gen=Test_genC.Generator() # Build/Load the Model @@ -108,54 +108,54 @@ def TestParamPrime(param, default=False): # You can automatically load the latest previous training of this model. if TestDefaultParam("LoadPreviousModel") and not LoadModel: print "Looking for Previous Model to load." - ReconstructionModel=ModelWrapper(Name=Name, LoadPrevious=True,OutputBase=OutputBase) + ReconstructionModel = ModelWrapper(Name=Name, LoadPrevious=True, OutputBase=OutputBase) # You can load a previous model using "-L" option with the model directory. -if LoadModel: - print "Loading Model From:",LoadModel - if LoadModel[-1]=="/": LoadModel=LoadModel[:-1] - ReconstructionModel=ModelWrapper(Name=os.path.basename(LoadModel),InDir=os.path.dirname(LoadModel), - OutputBase=OutputBase) +if LoadModel: + print "Loading Model From:", LoadModel + if LoadModel[-1] == "/": LoadModel = LoadModel[:-1] + ReconstructionModel = ModelWrapper(Name=os.path.basename(LoadModel), InDir=os.path.dirname(LoadModel), + OutputBase=OutputBase) ReconstructionModel.Load(LoadModel) if not ReconstructionModel.Model: - FailedLoad=True + FailedLoad = True else: - FailedLoad=False + FailedLoad = False # Or Build the model from scratch if FailedLoad: import keras + print "Building Model...", View1Shape = (240, 4096) View2Shape = (240, 4096) - ReconstructionModel=Model2DViewsTo3D(Name, View1Shape, View2Shape, Width, Depth, - BatchSize, NClasses, - init=TestDefaultParam("WeightInitialization",'normal'), - #activation=TestDefaultParam("activation","relu"), - Dropout=TestDefaultParam("DropoutLayers",0.5), - BatchNormalization=TestDefaultParam("BatchNormLayers",False), - OutputBase=OutputBase) + ReconstructionModel = Model2DViewsTo3D(Name, View1Shape, View2Shape, Width, Depth, + BatchSize, NClasses, + init=TestDefaultParam("WeightInitialization", 'normal'), + # activation=TestDefaultParam("activation","relu"), + Dropout=TestDefaultParam("DropoutLayers", 0.5), + BatchNormalization=TestDefaultParam("BatchNormLayers", False), + OutputBase=OutputBase) ReconstructionModel.Build() print " Done." - -print "Output Directory:",ReconstructionModel.OutDir +print "Output Directory:", ReconstructionModel.OutDir # Store the Configuration Dictionary -ReconstructionModel.MetaData["Configuration"]=Config +ReconstructionModel.MetaData["Configuration"] = Config if "HyperParamSet" in dir(): - ReconstructionModel.MetaData["HyperParamSet"]=HyperParamSet + ReconstructionModel.MetaData["HyperParamSet"] = HyperParamSet # Print out the Model Summary ReconstructionModel.Model.summary() # Compile The Model print "Compiling Model." -ReconstructionModel.BuildOptimizer(optimizer,Config) -ReconstructionModel.Compile(Metrics=["accuracy"]) +ReconstructionModel.BuildOptimizer(optimizer, Config) +ReconstructionModel.Compile(Metrics=["accuracy"]) # Train if Train or (RecoverMode and FailedLoad): @@ -164,98 +164,97 @@ def TestParamPrime(param, default=False): # These are all optional. from DLTools.CallBacks import TimeStopping, GracefulExit from keras.callbacks import * - callbacks=[ ] + + callbacks = [] # Still testing this... - if TestDefaultParam("UseGracefulExit",0): + if TestDefaultParam("UseGracefulExit", 0): print "Adding GracefulExit Callback." - callbacks.append( GracefulExit() ) + callbacks.append(GracefulExit()) - if TestDefaultParam("ModelCheckpoint",False): + if TestDefaultParam("ModelCheckpoint", False): ReconstructionModel.MakeOutputDir() - callbacks.append(ModelCheckpoint(ReconstructionModel.OutDir+"/Checkpoint.Weights.h5", - monitor=TestDefaultParam("monitor","val_loss"), + callbacks.append(ModelCheckpoint(ReconstructionModel.OutDir + "/Checkpoint.Weights.h5", + monitor=TestDefaultParam("monitor", "val_loss"), save_best_only=TestDefaultParam("ModelCheckpoint_save_best_only"), save_weights_only=TestDefaultParam("ModelCheckpoint_save_weights_only"), - mode=TestDefaultParam("ModelCheckpoint_mode","auto"), - period=TestDefaultParam("ModelCheckpoint_period",1), + mode=TestDefaultParam("ModelCheckpoint_mode", "auto"), + period=TestDefaultParam("ModelCheckpoint_period", 1), verbose=0)) if TestDefaultParam("EarlyStopping"): - callbacks.append(keras.callbacks.EarlyStopping(monitor=TestDefaultParam("monitor","val_loss"), - min_delta=TestDefaultParam("EarlyStopping_min_delta",0.01), + callbacks.append(keras.callbacks.EarlyStopping(monitor=TestDefaultParam("monitor", "val_loss"), + min_delta=TestDefaultParam("EarlyStopping_min_delta", 0.01), patience=TestDefaultParam("EarlyStopping_patience"), - mode=TestDefaultParam("EarlyStopping_mode",'auto'), + mode=TestDefaultParam("EarlyStopping_mode", 'auto'), verbose=0)) - if TestDefaultParam("RunningTime"): - print "Setting Runningtime to",RunningTime,"." - TSCB=TimeStopping(TestDefaultParam("RunningTime",3600*6),verbose=False) + print "Setting Runningtime to", RunningTime, "." + TSCB = TimeStopping(TestDefaultParam("RunningTime", 3600 * 6), verbose=False) callbacks.append(TSCB) - # Don't fill the log files with progress bar. if sys.flags.interactive: - verbose=1 + verbose = 1 else: - verbose=1 # Set to 2 + verbose = 1 # Set to 2 print "Evaluating score on test sample..." - score = ReconstructionModel.Model.evaluate_generator(Test_gen, steps=NTestSamples/BatchSize) - + score = ReconstructionModel.Model.evaluate_generator(Test_gen, steps=NTestSamples / BatchSize) + print "Initial Score:", score - ReconstructionModel.MetaData["InitialScore"]=score - - ReconstructionModel.History = ReconstructionModel.Model.fit_generator(Train_gen, - steps_per_epoch=(NSamples/BatchSize), - epochs=Epochs, - verbose=verbose, - validation_data=Test_gen, - validation_steps=NTestSamples/BatchSize, - callbacks=callbacks) + ReconstructionModel.MetaData["InitialScore"] = score - score = ReconstructionModel.Model.evaluate_generator(Test_gen, steps=NTestSamples/BatchSize) + ReconstructionModel.History = ReconstructionModel.Model.fit_generator(Train_gen, + steps_per_epoch=(NSamples / BatchSize), + epochs=Epochs, + verbose=verbose, + validation_data=Test_gen, + validation_steps=NTestSamples / BatchSize, + callbacks=callbacks) + score = ReconstructionModel.Model.evaluate_generator(Test_gen, steps=NTestSamples / BatchSize) print "Evaluating score on test sample..." print "Final Score:", score - ReconstructionModel.MetaData["FinalScore"]=score + ReconstructionModel.MetaData["FinalScore"] = score if TestDefaultParam("RunningTime"): - ReconstructionModel.MetaData["EpochTime"]=TSCB.history + ReconstructionModel.MetaData["EpochTime"] = TSCB.history # Store the parameters used for scanning for easier tables later: for k in Params: - ReconstructionModel.MetaData[k]=Config[k] + ReconstructionModel.MetaData[k] = Config[k] # Save Model ReconstructionModel.Save() else: print "Skipping Training." - + # Analysis if Analyze: Test_genC = MakeGenerator(TestSampleList, NTestSamples, - cachefile=Test_genC.cachefilename) #"/tmp/LArTPCDNN-LArIAT-TestEvent-Cache.h5") + cachefile=Test_genC.cachefilename) # "/tmp/LArTPCDNN-LArIAT-TestEvent-Cache.h5") Test_genC.PreloadData(n_threads_cache) [Test_X_View1, Test_X_View2], Test_Y = MergeInputs()(tuple(Test_genC.D)) from DLAnalysis.Classification import MultiClassificationAnalysis - result,NewMetaData=MultiClassificationAnalysis(ReconstructionModel,[Test_X_View1,Test_X_View2], - Test_Y,BatchSize,PDFFileName="ROC", - IndexMap=Test_genC.ClassIndexMap) + + result, NewMetaData = MultiClassificationAnalysis(ReconstructionModel, [Test_X_View1, Test_X_View2], + Test_Y, BatchSize, PDFFileName="ROC", + IndexMap=Test_genC.ClassIndexMap) ReconstructionModel.MetaData.update(NewMetaData) - + # Save again, in case Analysis put anything into the Model MetaData if not sys.flags.interactive: ReconstructionModel.Save() else: print "Warning: Interactive Mode. Use ReconstructionModel.Save() to save Analysis Results." - + # Make sure all of the Generators processes and threads are dead. # Not necessary... but ensures a graceful exit. # if not sys.flags.interactive: diff --git a/Recon3DLoadData.py b/Recon3DLoadData.py index a4bea50..cdb5015 100644 --- a/Recon3DLoadData.py +++ b/Recon3DLoadData.py @@ -1,102 +1,99 @@ import h5py -import glob,os,sys,time +import glob, os, sys, time import numpy as np -from DLTools.ThreadedGenerator import DLMultiClassGenerator,DLMultiClassFilterGenerator +from DLTools.ThreadedGenerator import DLMultiClassGenerator, DLMultiClassFilterGenerator + def main(): - datapath="/data/datasets/LarTPC/apr_9/" + datapath = "/data/datasets/LarTPC/apr_9/" - #Pull in datafiles + # Pull in datafiles filelist2d = glob.glob(datapath + "2d/*") filelist3d = glob.glob(datapath + "3d/*") filelist2d.sort() filelist3d.sort() assert len(filelist2d) == len(filelist3d), "Number of 2D and 3D files mismatch!" - try: - n_threads=int(sys.argv[1]) + n_threads = int(sys.argv[1]) except: - n_threads=6 + n_threads = 6 try: - n_threads2=int(sys.argv[2]) + n_threads2 = int(sys.argv[2]) except: - n_threads2=n_threads + n_threads2 = n_threads + + Train_gen = LarTPCDataGenerator(filelist3d, n_threads=n_threads, max=100000, + bins=(240, 240, 256), verbose=False) - Train_gen=LarTPCDataGenerator(filelist3d, n_threads=n_threads,max=100000, - bins=(240,240,256),verbose=False) - print "Generator Ready" print "ClassIndex:", Train_gen.ClassIndexMap - print "Object Shape:",Train_gen.shapes + print "Object Shape:", Train_gen.shapes sys.stdout.flush() - - N=1 - NN=n_threads - count=0 - old=start=time.time() + + N = 1 + NN = n_threads + count = 0 + old = start = time.time() for tries in xrange(1): - print "*********************Try:",tries - #for D in Train_gen.Generator(): - #GENERATOR CALLED HERE, FEED THIS TO OUTPUT FOR 3D + print "*********************Try:", tries + # for D in Train_gen.Generator(): + # GENERATOR CALLED HERE, FEED THIS TO OUTPUT FOR 3D for D in Train_gen.Generator(): - NN-=0 - if NN<0: + NN -= 0 + if NN < 0: break - start1=time.time() - Delta=(start1-start) - Delta2=(start1-old) - old=start1 - print count,":",Delta, ":",Delta/float(N), Delta2 + start1 = time.time() + Delta = (start1 - start) + Delta2 = (start1 - old) + old = start1 + print count, ":", Delta, ":", Delta / float(N), Delta2 sys.stdout.flush() - N+=1 + N += 1 for d in D: print d.shape - print d[np.where(d!=0.)] - NN=d.shape[0] - #print d[0] + print d[np.where(d != 0.)] + NN = d.shape[0] + # print d[0] pass - count+=NN - + count += NN -def LarTPCDataGenerator(files="/data", batchsize=16, datasets=['images3D/C','images3D/V'], Norm=True, - bins=(240,240,256),**kwargs): - +def LarTPCDataGenerator(files="/data", batchsize=16, datasets=['images3D/C', 'images3D/V'], Norm=True, + bins=(240, 240, 256), **kwargs): Samples = [] for F in files: - basename=os.path.basename(F) - ParticleName=basename.split("_")[0] - Samples.append((F,datasets,ParticleName)) + basename = os.path.basename(F) + ParticleName = basename.split("_")[0] + Samples.append((F, datasets, ParticleName)) - #Samples = [ (Directory+"muon_158.2d.h5", datasets, "data")] + # Samples = [ (Directory+"muon_158.2d.h5", datasets, "data")] - def MakeImage(bins,Norm=True): + def MakeImage(bins, Norm=True): def f(D): for i in xrange(D[0].shape[0]): if Norm: - w=np.tanh(np.sign(D[1][i]) * np.log(np.abs(D[1][i]) + 1.0) / 2.0) + w = np.tanh(np.sign(D[1][i]) * np.log(np.abs(D[1][i]) + 1.0) / 2.0) else: - w=D[1][i] - R,b=np.histogramdd(D[0][i], bins=list(bins), weights=w) - return [R]+D[2:] + w = D[1][i] + R, b = np.histogramdd(D[0][i], bins=list(bins), weights=w) + return [R] + D[2:] + return f - - GC= DLMultiClassGenerator(Samples, batchsize=batchsize, - preprocessfunction=MakeImage(bins,False), - OneHot=True, - shapes = [(batchsize,)+bins,(batchsize, 2)], - **kwargs) + GC = DLMultiClassGenerator(Samples, batchsize=batchsize, + preprocessfunction=MakeImage(bins, False), + OneHot=True, + shapes=[(batchsize,) + bins, (batchsize, 2)], + **kwargs) return GC - # -#def LarTPCDataGeneratorOld(Directory="/data/", batchsize=16, datasets=[u'3DImages'],**kwargs): +# def LarTPCDataGeneratorOld(Directory="/data/", batchsize=16, datasets=[u'3DImages'],**kwargs): # # Samples = [ (Directory+"3d/", datasets, "signal" ), # (Directory+"/dnn_NEXT100_Bi214_bg_v2x2x2_r200x200x200.Tensor.h5", datasets, "background" )] @@ -113,9 +110,9 @@ def f(D): -#Test=1 - -#if __name__ == '__main__' and Test==0: +# Test=1 + +# if __name__ == '__main__' and Test==0: # import sys # Directory="/data/datasets/LarTPC/apr_9/3d/" # @@ -164,4 +161,4 @@ def f(D): if __name__ == '__main__': - main() \ No newline at end of file + main() diff --git a/Recon3DModels.py b/Recon3DModels.py index ed2e3b3..0d05760 100644 --- a/Recon3DModels.py +++ b/Recon3DModels.py @@ -3,7 +3,7 @@ from keras.layers.merge import concatenate from keras.models import Sequential, Model from keras.layers.core import Dense, Activation -from keras.layers import BatchNormalization,Dropout,Flatten, Input +from keras.layers import BatchNormalization, Dropout, Flatten, Input from keras.models import model_from_json from keras.layers import Input, Dense, Conv2D, MaxPooling2D, Conv3D, UpSampling3D from keras.models import Model @@ -11,118 +11,118 @@ from keras.callbacks import TensorBoard import numpy as np + class Model2DViewsTo3DDense(ModelWrapper): def __init__(self, Name, View1Shape, View2Shape, width=0, depth=0, BatchSize=2048, N_Classes=0, init=0, BatchNormalization=False, Dropout=False, **kwargs): - super(Model2DViewsTo3DDense, self).__init__(Name,**kwargs) - - - self.width=width - self.depth=depth - self.init=init - - self.Dropout=Dropout - self.BatchSize=BatchSize - self.BatchNormalization=BatchNormalization - + super(Model2DViewsTo3DDense, self).__init__(Name, **kwargs) + + self.width = width + self.depth = depth + self.init = init + + self.Dropout = Dropout + self.BatchSize = BatchSize + self.BatchNormalization = BatchNormalization + self.input1_shape = View1Shape self.input2_shape = View2Shape self.N_Classes = N_Classes - - self.MetaData.update({ "width":self.width, - "depth":self.depth, - "Dropout":self.Dropout, - "BatchNormalization":BatchNormalization, - "input1_shape":self.input1_shape, - "input2_shape":self.input2_shape, - "N_classes":self.N_Classes, - "init":self.init}) + + self.MetaData.update({"width": self.width, + "depth": self.depth, + "Dropout": self.Dropout, + "BatchNormalization": BatchNormalization, + "input1_shape": self.input1_shape, + "input2_shape": self.input2_shape, + "N_classes": self.N_Classes, + "init": self.init}) def Build(self): - input1=Input(self.input1_shape) - input2=Input(self.input2_shape) + input1 = Input(self.input1_shape) + input2 = Input(self.input2_shape) input1 = Flatten(input_shape=self.input1_shape)(input1) input2 = Flatten(input_shape=self.input2_shape)(input2) modelT = concatenate([input1, input2]) - - #model.add(Dense(self.width,init=self.init)) + + # model.add(Dense(self.width,init=self.init)) modelT = (Activation('relu')(modelT)) - for i in xrange(0,self.depth): + for i in xrange(0, self.depth): if self.BatchNormalization: - modelT=BatchNormalization()(modelT) + modelT = BatchNormalization()(modelT) - modelT=Dense(self.width,kernel_initializer=self.init)(modelT) - modelT=Activation(self.Activation)(modelT) + modelT = Dense(self.width, kernel_initializer=self.init)(modelT) + modelT = Activation(self.Activation)(modelT) if self.Dropout: - modelT=Dropout(self.Dropout)(modelT) + modelT = Dropout(self.Dropout)(modelT) + + modelT = Dense(self.N_Classes, activation='softmax', kernel_initializer=self.init)(modelT) + + self.Model = Model(input, modelT) - modelT=Dense(self.N_Classes, activation='softmax',kernel_initializer=self.init)(modelT) - - self.Model=Model(input,modelT) -############################################################################################################# class Model2DViewsTo3DConv(ModelWrapper): def __init__(self, Name, View1Shape, View2Shape, width=0, depth=0, BatchSize=2048, N_Classes=0, init=0, BatchNormalization=False, Dropout=False, **kwargs): - super(Model2DViewsTo3DConv, self).__init__(Name,**kwargs) - - - self.width=width - self.depth=depth - self.init=init - - self.Dropout=Dropout - self.BatchSize=BatchSize - self.BatchNormalization=BatchNormalization - + super(Model2DViewsTo3DConv, self).__init__(Name, **kwargs) + + self.width = width + self.depth = depth + self.init = init + + self.Dropout = Dropout + self.BatchSize = BatchSize + self.BatchNormalization = BatchNormalization + self.input1_shape = View1Shape self.input2_shape = View2Shape self.N_Classes = N_Classes - - self.MetaData.update({ "width":self.width, - "depth":self.depth, - "Dropout":self.Dropout, - "BatchNormalization":BatchNormalization, - "input1_shape":self.input1_shape, - "input2_shape":self.input2_shape, - "N_classes":self.N_Classes, - "init":self.init}) + + self.MetaData.update({"width": self.width, + "depth": self.depth, + "Dropout": self.Dropout, + "BatchNormalization": BatchNormalization, + "input1_shape": self.input1_shape, + "input2_shape": self.input2_shape, + "N_classes": self.N_Classes, + "init": self.init}) + def Build(self): - input1=Input(self.input1_shape) - input2=Input(self.input2_shape) - - input_img = Input(shape=(240,4086, 1)) # adapt this if using `channels_first` image data format - - x = Conv2D(64, (3, 3), strides=(1, 1), activation='relu', padding='same')(input_img) - x = MaxPooling2D((2, 2), padding='same')(x) - x = Conv2D(32, (3, 3), activation='relu', padding='same')(x) - x = MaxPooling2D((2, 2), padding='same')(x) - x = Conv2D(8, (3, 3), activation='relu', padding='same')(x) - encoded1 = MaxPooling2D((2, 2), padding='same')(x) - - y = Conv2D(64, (3, 3), strides=(1, 1), activation='relu', padding='same')(input_img) - y = MaxPooling2D((2, 2), padding='same')(y) - y = Conv2D(32, (3, 3), activation='relu', padding='same')(y) - y = MaxPooling2D((2, 2), padding='same')(y) - y = Conv2D(8, (3, 3), activation='relu', padding='same')(y) - encoded2 = MaxPooling2D((2, 2), padding='same')(y) - - #concatenate images - z = concatenate([encoded1, encoded2]) - - #Now decode in 3D - z = Conv3D(8, (3, 3), activation='relu', padding='same')(z) - z = UpSampling3D((2, 2))(z) - z = Conv3D(32, (3, 3), activation='relu', padding='same')(z) - z = UpSampling3D((2, 2))(z) - z = Conv3D(64, (3, 3), activation='relu')(z) - z = UpSampling3D((2, 2))(z) - decoded = Conv3D(1, (3, 3), activation='sigmoid', padding='same')(z) - - autoencoder = Model(inputs=[input1, input2], outputs=decoded) - autoencoder.compile(optimizer='adadelta', loss='binary_crossentropy') - - self.Model=Model(input,modelT) + input1 = Input(self.input1_shape) + input2 = Input(self.input2_shape) + + input_img = Input(shape=(240, 4086, 1)) # adapt this if using `channels_first` image data format + + x = Conv2D(64, (3, 3), strides=(1, 1), activation='relu', padding='same')(input_img) + x = MaxPooling2D((2, 2), padding='same')(x) + x = Conv2D(32, (3, 3), activation='relu', padding='same')(x) + x = MaxPooling2D((2, 2), padding='same')(x) + x = Conv2D(8, (3, 3), activation='relu', padding='same')(x) + encoded1 = MaxPooling2D((2, 2), padding='same')(x) + + y = Conv2D(64, (3, 3), strides=(1, 1), activation='relu', padding='same')(input_img) + y = MaxPooling2D((2, 2), padding='same')(y) + y = Conv2D(32, (3, 3), activation='relu', padding='same')(y) + y = MaxPooling2D((2, 2), padding='same')(y) + y = Conv2D(8, (3, 3), activation='relu', padding='same')(y) + encoded2 = MaxPooling2D((2, 2), padding='same')(y) + + # concatenate images + z = concatenate([encoded1, encoded2]) + + # Now decode in 3D + z = Conv3D(8, (3, 3), activation='relu', padding='same')(z) + z = UpSampling3D((2, 2))(z) + z = Conv3D(32, (3, 3), activation='relu', padding='same')(z) + z = UpSampling3D((2, 2))(z) + z = Conv3D(64, (3, 3), activation='relu')(z) + z = UpSampling3D((2, 2))(z) + decoded = Conv3D(1, (3, 3), activation='sigmoid', padding='same')(z) + + autoencoder = Model(inputs=[input1, input2], outputs=decoded) + autoencoder.compile(optimizer='adadelta', loss='binary_crossentropy') + + self.Model = Model(input, modelT) From 99ea65bc9773ca1425ff14fb78a179b223e971ba Mon Sep 17 00:00:00 2001 From: Garrett Brown Date: Thu, 4 May 2017 17:49:31 -0500 Subject: [PATCH 11/22] Fixed convolutional model --- Recon3DModels.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/Recon3DModels.py b/Recon3DModels.py index 0d05760..8815dd6 100644 --- a/Recon3DModels.py +++ b/Recon3DModels.py @@ -94,16 +94,14 @@ def Build(self): input1 = Input(self.input1_shape) input2 = Input(self.input2_shape) - input_img = Input(shape=(240, 4086, 1)) # adapt this if using `channels_first` image data format - - x = Conv2D(64, (3, 3), strides=(1, 1), activation='relu', padding='same')(input_img) + x = Conv2D(64, (3, 3), strides=(1, 1), activation='relu', padding='same')(input1) x = MaxPooling2D((2, 2), padding='same')(x) x = Conv2D(32, (3, 3), activation='relu', padding='same')(x) x = MaxPooling2D((2, 2), padding='same')(x) x = Conv2D(8, (3, 3), activation='relu', padding='same')(x) encoded1 = MaxPooling2D((2, 2), padding='same')(x) - y = Conv2D(64, (3, 3), strides=(1, 1), activation='relu', padding='same')(input_img) + y = Conv2D(64, (3, 3), strides=(1, 1), activation='relu', padding='same')(input2) y = MaxPooling2D((2, 2), padding='same')(y) y = Conv2D(32, (3, 3), activation='relu', padding='same')(y) y = MaxPooling2D((2, 2), padding='same')(y) @@ -123,6 +121,5 @@ def Build(self): decoded = Conv3D(1, (3, 3), activation='sigmoid', padding='same')(z) autoencoder = Model(inputs=[input1, input2], outputs=decoded) - autoencoder.compile(optimizer='adadelta', loss='binary_crossentropy') - self.Model = Model(input, modelT) + self.Model = Model(input, autoencoder) From 1bd3e1121be1f574bbae411f22410316a30bbe2b Mon Sep 17 00:00:00 2001 From: Garrett Brown Date: Thu, 4 May 2017 18:38:00 -0500 Subject: [PATCH 12/22] Re-add data loading code. Need to have it load 2D training and test, as well as 3D training and test. --- Recon3DExperiment.py | 116 ++++++++++++++++++++++--------------------- 1 file changed, 60 insertions(+), 56 deletions(-) diff --git a/Recon3DExperiment.py b/Recon3DExperiment.py index 8868e6f..3f56e23 100644 --- a/Recon3DExperiment.py +++ b/Recon3DExperiment.py @@ -44,62 +44,66 @@ def TestParamPrime(param, default=False): # Load the Data. TODO from LArTPCDNN.LoadData import * - -# TrainSampleList,TestSampleList=DivideFiles(FileSearch,[float(NSamples)/MaxEvents,float(NTestSamples)/MaxEvents], -# datasetnames=[u'features'], -# Particles=Particles) - -# # Figure out the output shape... This is not necessary. But the automatic mechanism is inefficient. -# if ScanWindowSize>0: -# # shapes=[(BatchSize*multiplier, 2, 240, ScanWindowSize), (BatchSize*multiplier, NClasses)] -# shapes=[(BatchSize*multiplier, 240, ScanWindowSize), -# (BatchSize*multiplier, 240, ScanWindowSize), -# (BatchSize*multiplier, NClasses)] -# viewshape=(None, 240, ScanWindowSize) -# else: -# shapes=[(BatchSize*multiplier, 240, 4096/DownSampleSize), -# (BatchSize*multiplier, 240, 4096/DownSampleSize), -# (BatchSize*multiplier, NClasses)] - -# viewshape=(None, 240, 4096/DownSampleSize) - -# def MakeGenerator(SampleList,NSamples, -# cachefile="LArIAT-LoadDataTest-Cache.h5",**kwargs): - -# return DLMultiClassFilterGenerator(TrainSampleList, FilterEnergy(EnergyCut), max=NSamples, -# preprocessfunction=ProcessWireData(DownSampleSize,ScanWindowSize,Normalize), -# postprocessfunction=MergeInputs(), -# batchsize=BatchSize, -# shapes=shapes, -# n_threads=n_threads, -# multiplier=multiplier, -# cachefile=cachefile, -# **kwargs) - -# # Use DLGenerators to read data -# Train_genC = MakeGenerator(TrainSampleList, NSamples, -# cachefile="/tmp/LArTPCDNN-LArIAT-TrainEvent-Cache.h5") - -# Test_genC = MakeGenerator(TestSampleList, NTestSamples, -# cachefile="/tmp/LArTPCDNN-LArIAT-TestEvent-Cache.h5") - -# print "Train Class Index Map:", Train_genC.ClassIndexMap -# #print "Test Class Index Map:", Test_genC.ClassIndexMap - -# Cache=True - -# if Preload: -# print "Caching data in memory for faster processing after first epoch. Hope you have enough memory." -# Train_gen=Train_genC.PreloadGenerator() -# Test_gen=Test_genC.PreloadGenerator() -# elif Cache: -# print "Caching data on disk for faster processing after first epoch. Hope you have enough disk space." -# Train_gen=Train_genC.DiskCacheGenerator(n_threads_cache) -# Test_gen=Test_genC.DiskCacheGenerator(n_threads_cache) -# else: -# Train_gen=Train_genC.Generator() -# Test_gen=Test_genC.Generator() - +from LArTPCDNN.Recon3DLoadData import * + +FileSearch = "h5/*.h5" + +TrainSampleList, TestSampleList = DivideFiles(FileSearch, + [float(NSamples) / MaxEvents, float(NTestSamples) / MaxEvents], + datasetnames=[u'features'], + Particles=Particles) + +# Figure out the output shape... This is not necessary. But the automatic mechanism is inefficient. +if ScanWindowSize > 0: + # shapes=[(BatchSize*multiplier, 2, 240, ScanWindowSize), (BatchSize*multiplier, NClasses)] + shapes = [(BatchSize * multiplier, 240, ScanWindowSize), + (BatchSize * multiplier, 240, ScanWindowSize), + (BatchSize * multiplier, NClasses)] + viewshape = (None, 240, ScanWindowSize) +else: + shapes = [(BatchSize * multiplier, 240, 4096 / DownSampleSize), + (BatchSize * multiplier, 240, 4096 / DownSampleSize), + (BatchSize * multiplier, NClasses)] + +viewshape = (None, 240, 4096 / DownSampleSize) + + +def MakeGenerator(SampleList, NSamples, + cachefile="LArIAT-LoadDataTest-Cache.h5", **kwargs): + return DLMultiClassFilterGenerator(TrainSampleList, FilterEnergy(EnergyCut), max=NSamples, + preprocessfunction=ProcessWireData(DownSampleSize, ScanWindowSize, Normalize), + postprocessfunction=MergeInputs(), + batchsize=BatchSize, + shapes=shapes, + n_threads=n_threads, + multiplier=multiplier, + cachefile=cachefile, + **kwargs) + + +# Use DLGenerators to read data +Train_genC = MakeGenerator(TrainSampleList, NSamples, + cachefile="/tmp/LArTPCDNN-LArIAT-TrainEvent-Cache.h5") + +Test_genC = MakeGenerator(TestSampleList, NTestSamples, + cachefile="/tmp/LArTPCDNN-LArIAT-TestEvent-Cache.h5") + +print "Train Class Index Map:", Train_genC.ClassIndexMap +# print "Test Class Index Map:", Test_genC.ClassIndexMap + +Cache = True + +if Preload: + print "Caching data in memory for faster processing after first epoch. Hope you have enough memory." + Train_gen = Train_genC.PreloadGenerator() + Test_gen = Test_genC.PreloadGenerator() +elif Cache: + print "Caching data on disk for faster processing after first epoch. Hope you have enough disk space." + Train_gen = Train_genC.DiskCacheGenerator(n_threads_cache) + Test_gen = Test_genC.DiskCacheGenerator(n_threads_cache) +else: + Train_gen = Train_genC.Generator() + Test_gen = Test_genC.Generator() # Build/Load the Model from DLTools.ModelWrapper import ModelWrapper From f98a9d6b698a444befdb342f23aa629420964b03 Mon Sep 17 00:00:00 2001 From: Garrett Brown Date: Fri, 5 May 2017 14:38:46 -0500 Subject: [PATCH 13/22] A bit more progress --- Recon3DExperiment.py | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/Recon3DExperiment.py b/Recon3DExperiment.py index 3f56e23..cc68c45 100644 --- a/Recon3DExperiment.py +++ b/Recon3DExperiment.py @@ -46,24 +46,17 @@ def TestParamPrime(param, default=False): from LArTPCDNN.LoadData import * from LArTPCDNN.Recon3DLoadData import * -FileSearch = "h5/*.h5" +FileSearch = "apr_9/2d/muon*.h5" TrainSampleList, TestSampleList = DivideFiles(FileSearch, [float(NSamples) / MaxEvents, float(NTestSamples) / MaxEvents], datasetnames=[u'features'], Particles=Particles) +bins3d=(240, 240, 256) -# Figure out the output shape... This is not necessary. But the automatic mechanism is inefficient. -if ScanWindowSize > 0: - # shapes=[(BatchSize*multiplier, 2, 240, ScanWindowSize), (BatchSize*multiplier, NClasses)] - shapes = [(BatchSize * multiplier, 240, ScanWindowSize), - (BatchSize * multiplier, 240, ScanWindowSize), - (BatchSize * multiplier, NClasses)] - viewshape = (None, 240, ScanWindowSize) -else: - shapes = [(BatchSize * multiplier, 240, 4096 / DownSampleSize), - (BatchSize * multiplier, 240, 4096 / DownSampleSize), - (BatchSize * multiplier, NClasses)] +shapes = [(BatchSize * multiplier, 240, 4096 / DownSampleSize), + (BatchSize * multiplier, 240, 4096 / DownSampleSize), + (BatchSize * multiplier,) + bins3d] viewshape = (None, 240, 4096 / DownSampleSize) From dd906cc5abd93796d9013c4814a17283a12bf083 Mon Sep 17 00:00:00 2001 From: Garrett Brown Date: Sat, 6 May 2017 19:36:32 -0500 Subject: [PATCH 14/22] Use cluster path --- Recon3DLoadData.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Recon3DLoadData.py b/Recon3DLoadData.py index cdb5015..bd73b0b 100644 --- a/Recon3DLoadData.py +++ b/Recon3DLoadData.py @@ -6,7 +6,7 @@ def main(): - datapath = "/data/datasets/LarTPC/apr_9/" + datapath = "/data/cloud/project/data/apr_9/" # Pull in datafiles filelist2d = glob.glob(datapath + "2d/*") From 099dc43d66f6b0b13982cdb75c096606976ac641 Mon Sep 17 00:00:00 2001 From: cloudy Date: Sat, 6 May 2017 22:31:11 -0500 Subject: [PATCH 15/22] Generated merged --- .gitignore | 1 + LoadData.py | 4 +- Recon3DExperiment.py | 76 ++++++----- Recon3DLoadData.py | 315 ++++++++++++++++++++++++++++++------------- 4 files changed, 268 insertions(+), 128 deletions(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0d20b64 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*.pyc diff --git a/LoadData.py b/LoadData.py index 0b136f7..3372867 100644 --- a/LoadData.py +++ b/LoadData.py @@ -188,7 +188,7 @@ def DivideFiles(FileSearch="/data/LArIAT/h5_files/*.h5",Fractions=[.9,.1],datase #FileSearch="/data/LArIAT/h5_files/*.h5" # CHANGE TO /data/cloud/project/data/apr_9/2d/*.h5 - FileSearch="/data/datasets/LarTPC/apr_9/2d/*.h5" + FileSearch="/data/cloud/project/data/apr_9/2d/*.h5" try: n_threads=int(sys.argv[1]) except: @@ -218,7 +218,7 @@ def DivideFiles(FileSearch="/data/LArIAT/h5_files/*.h5",Fractions=[.9,.1],datase ScanWindowSize=ScanWindowSize, Norm=Normalize, #shapes=[(128*m, 2, 240, 4096/DownSampleSize), (128*m, 16)], - shapes=[(128*m, 240, ScanWindowSize), (128, 240, 256)], + #shapes=[(128*m, 240, ScanWindowSize), (128, 240, 256)], n_threads=n_threads, SharedDataQueueSize=1, multiplier=m, diff --git a/Recon3DExperiment.py b/Recon3DExperiment.py index cc68c45..9091f1c 100644 --- a/Recon3DExperiment.py +++ b/Recon3DExperiment.py @@ -46,41 +46,47 @@ def TestParamPrime(param, default=False): from LArTPCDNN.LoadData import * from LArTPCDNN.Recon3DLoadData import * -FileSearch = "apr_9/2d/muon*.h5" - -TrainSampleList, TestSampleList = DivideFiles(FileSearch, - [float(NSamples) / MaxEvents, float(NTestSamples) / MaxEvents], - datasetnames=[u'features'], - Particles=Particles) -bins3d=(240, 240, 256) - -shapes = [(BatchSize * multiplier, 240, 4096 / DownSampleSize), - (BatchSize * multiplier, 240, 4096 / DownSampleSize), - (BatchSize * multiplier,) + bins3d] - -viewshape = (None, 240, 4096 / DownSampleSize) - - -def MakeGenerator(SampleList, NSamples, - cachefile="LArIAT-LoadDataTest-Cache.h5", **kwargs): - return DLMultiClassFilterGenerator(TrainSampleList, FilterEnergy(EnergyCut), max=NSamples, - preprocessfunction=ProcessWireData(DownSampleSize, ScanWindowSize, Normalize), - postprocessfunction=MergeInputs(), - batchsize=BatchSize, - shapes=shapes, - n_threads=n_threads, - multiplier=multiplier, - cachefile=cachefile, - **kwargs) - - -# Use DLGenerators to read data -Train_genC = MakeGenerator(TrainSampleList, NSamples, - cachefile="/tmp/LArTPCDNN-LArIAT-TrainEvent-Cache.h5") - -Test_genC = MakeGenerator(TestSampleList, NTestSamples, - cachefile="/tmp/LArTPCDNN-LArIAT-TestEvent-Cache.h5") - +print "Loading 2D and 3D data" + +Train2D, Train3D = combined2D3DGenerator() + +print "Testing 2D and 3D data load" + +#FileSearch = "apr_9/2d/muon*.h5" +# +#TrainSampleList, TestSampleList = DivideFiles(FileSearch, +# [float(NSamples) / MaxEvents, float(NTestSamples) / MaxEvents], +# datasetnames=[u'features'], +# Particles=Particles) +#bins3d=(240, 240, 256) +# +#shapes = [(BatchSize * multiplier, 240, 4096 / DownSampleSize), +# (BatchSize * multiplier, 240, 4096 / DownSampleSize), +# (BatchSize * multiplier,) + bins3d] +# +#viewshape = (None, 240, 4096 / DownSampleSize) +# +# +#def MakeGenerator(SampleList, NSamples, +# cachefile="LArIAT-LoadDataTest-Cache.h5", **kwargs): +# return DLMultiClassFilterGenerator(TrainSampleList, FilterEnergy(EnergyCut), max=NSamples, +# preprocessfunction=ProcessWireData(DownSampleSize, ScanWindowSize, Normalize), +# postprocessfunction=MergeInputs(), +# batchsize=BatchSize, +# shapes=shapes, +# n_threads=n_threads, +# multiplier=multiplier, +# cachefile=cachefile, +# **kwargs) +# +# +## Use DLGenerators to read data +#Train_genC = MakeGenerator(TrainSampleList, NSamples, +# cachefile="/tmp/LArTPCDNN-LArIAT-TrainEvent-Cache.h5") +# +#Test_genC = MakeGenerator(TestSampleList, NTestSamples, +# cachefile="/tmp/LArTPCDNN-LArIAT-TestEvent-Cache.h5") +# print "Train Class Index Map:", Train_genC.ClassIndexMap # print "Test Class Index Map:", Test_genC.ClassIndexMap diff --git a/Recon3DLoadData.py b/Recon3DLoadData.py index cdb5015..eb4f9ae 100644 --- a/Recon3DLoadData.py +++ b/Recon3DLoadData.py @@ -5,9 +5,71 @@ from DLTools.ThreadedGenerator import DLMultiClassGenerator, DLMultiClassFilterGenerator -def main(): - datapath = "/data/datasets/LarTPC/apr_9/" +def combined2D3DGenerator(): + #datapath = "/data/datasets/LarTPC/apr_9/" + datapath = "/data/cloud/project/data/apr_9/" + # Pull in datafiles + filelist2d = glob.glob(datapath + "2d/*") + filelist3d = glob.glob(datapath + "3d/*") + filelist2d.sort() + filelist3d.sort() + assert len(filelist2d) == len(filelist3d), "Number of 2D and 3D files mismatch!" + + try: + n_threads = int(sys.argv[1]) + except: + n_threads = 6 + + try: + n_threads2 = int(sys.argv[2]) + except: + n_threads2 = n_threads + + + Train_gen3D = LarTPCDataGenerator(filelist3d, n_threads=n_threads, max=100000, + bins=(240, 240, 256), verbose=False) + + DownSampleSize=8 + ScanWindowSize=256 + Normalize=True + closefiles=False + m = 1 + Train_gen2D =LArIATDataGenerator(FileSearch=datapath + "2d/*", + max=128*10000, + batchsize=128, + DownSampleSize=DownSampleSize, + ScanWindowSize=ScanWindowSize, + Norm=Normalize, + #shapes=[(128*m, 2, 240, 4096/DownSampleSize), (128*m, 16)], + #shapes=[(128*m, 240, ScanWindowSize), (128, 240, 256)], + #shapes=[(128*m, 2, 240, ScanWindowSize), (128*m, 16)], + #shapes=[(128*m, 240, ScanWindowSize)], + n_threads=n_threads, + SharedDataQueueSize=1, + multiplier=m, + closefiles=closefiles, + verbose=False, + timing=False, + sleep=1, + Wrap=False) + def MergerGenerator(T2D, T3D): + while True: + s2d = T2D.Generator().next() + s3d = T3D.Generator().next() + + if not T2D or not T3D: + break + + yield s2d, s3d + + return MergerGenerator(Train_gen2D, Train_gen3D) + + + +def main(): + #datapath = "/data/datasets/LarTPC/apr_9/" + datapath = "/data/cloud/project/data/apr_9/" # Pull in datafiles filelist2d = glob.glob(datapath + "2d/*") filelist3d = glob.glob(datapath + "3d/*") @@ -25,9 +87,16 @@ def main(): except: n_threads2 = n_threads - Train_gen = LarTPCDataGenerator(filelist3d, n_threads=n_threads, max=100000, + + Train_gen = LarTPCDataGenerator(filelist3d, n_threads=n_threads, max=100000, bins=(240, 240, 256), verbose=False) + DownSampleSize=8 + ScanWindowSize=256 + Normalize=True + closefiles=False + m = 1 + print "Generator Ready" print "ClassIndex:", Train_gen.ClassIndexMap print "Object Shape:", Train_gen.shapes @@ -61,8 +130,45 @@ def main(): count += NN -def LarTPCDataGenerator(files="/data", batchsize=16, datasets=['images3D/C', 'images3D/V'], Norm=True, - bins=(240, 240, 256), **kwargs): +def LArIATDataGenerator(FileSearch="/data/LArIAT/*.h5",DownSampleSize=4, ScanWindowSize=256,EnergyCut=0.61, + datasetnames=[u'features'], Norm=False, MaxFiles=-1, **kwargs): + + print "Searching in :",FileSearch + Files = glob.glob(FileSearch) + + print "Found",len(Files),"files." + Files.sort() + + if MaxFiles!=-1: + random.shuffle(Files) + Files=Files[:MaxFiles] + + Samples=[] + + FileCount=0 + + for F in Files: + FileCount+=1 + basename=os.path.basename(F) + ParticleName=basename.split("_")[0] + + Samples.append((F,datasetnames,ParticleName)) + if MaxFiles>0: + if FileCount>MaxFiles: + break + + GC= DLMultiClassFilterGenerator(Samples, FilterEnergy(EnergyCut), + preprocessfunction=ProcessWireData(DownSampleSize,ScanWindowSize,Norm), + **kwargs) + return GC + + + + + + +def LarTPCDataGenerator(files="/data", is2D = False, batchsize=16, datasets=['images3D/C', 'images3D/V'], Norm=True, + bins=None, EnergyCut=0.61, DownSampleSize = 2, ScanWindowSize = 256, **kwargs): Samples = [] for F in files: @@ -73,92 +179,119 @@ def LarTPCDataGenerator(files="/data", batchsize=16, datasets=['images3D/C', 'im # Samples = [ (Directory+"muon_158.2d.h5", datasets, "data")] def MakeImage(bins, Norm=True): - def f(D): - for i in xrange(D[0].shape[0]): - if Norm: - w = np.tanh(np.sign(D[1][i]) * np.log(np.abs(D[1][i]) + 1.0) / 2.0) - else: - w = D[1][i] - R, b = np.histogramdd(D[0][i], bins=list(bins), weights=w) - return [R] + D[2:] - - return f - - GC = DLMultiClassGenerator(Samples, batchsize=batchsize, - preprocessfunction=MakeImage(bins, False), - OneHot=True, - shapes=[(batchsize,) + bins, (batchsize, 2)], - **kwargs) + if bins != None: + def f(D): + for i in xrange(D[0].shape[0]): + if Norm: + w = np.tanh(np.sign(D[1][i]) * np.log(np.abs(D[1][i]) + 1.0) / 2.0) + else: + w = D[1][i] + R, b = np.histogramdd(D[0][i], bins=list(bins), weights=w) + return [R] + D[2:] + return f + else: + return False + + if bins == None: + bins = (0,) + + if is2D: + GC= DLMultiClassFilterGenerator(Samples, FilterEnergy(EnergyCut), + preprocessfunction=ProcessWireData(DownSampleSize,ScanWindowSize,Norm), + **kwargs) + + else: + GC = DLMultiClassGenerator(Samples, batchsize=batchsize, + preprocessfunction=MakeImage(bins, False), + OneHot=True, + shapes=[(batchsize,) + bins, (batchsize, 2)], + **kwargs) + return GC -# -# def LarTPCDataGeneratorOld(Directory="/data/", batchsize=16, datasets=[u'3DImages'],**kwargs): -# -# Samples = [ (Directory+"3d/", datasets, "signal" ), -# (Directory+"/dnn_NEXT100_Bi214_bg_v2x2x2_r200x200x200.Tensor.h5", datasets, "background" )] -# -# def filterfunction(batchdict): -# r= np.array(range(batchdict["3DImages"].shape[0])) -# return r[0] -# -# -# GC= DLMultiClassFilterGenerator(Samples, batchsize=batchsize, FilterFunc=False, -# OneHot=True, shapes = [(batchsize, 200,200,200), (batchsize, 2)], **kwargs) -# return GC - - - - -# Test=1 - -# if __name__ == '__main__' and Test==0: -# import sys -# Directory="/data/datasets/LarTPC/apr_9/3d/" -# -# try: -# n_threads=int(sys.argv[1]) -# except: -# n_threads=6 -# -# try: -# n_threads2=int(sys.argv[2]) -# except: -# n_threads2=n_threads -# -# Train_gen=LarTPCDataGeneratorOld(Directory,n_threads=n_threads,max=100000, verbose=False) -# -# print "Generator Ready" -# print "ClassIndex:", Train_gen.ClassIndexMap -# print "Object Shape:",Train_gen.shapes -# sys.stdout.flush() -# -# N=1 -# NN=n_threads -# count=0 -# old=start=time.time() -# for tries in xrange(1): -# print "*********************Try:",tries -# #for D in Train_gen.Generator(): -# for D in Train_gen.Generator(): -# NN-=0 -# if NN<0: -# break -# start1=time.time() -# Delta=(start1-start) -# Delta2=(start1-old) -# old=start1 -# print count,":",Delta, ":",Delta/float(N), Delta2 -# sys.stdout.flush() -# N+=1 -# for d in D: -# print d.shape -# #print d[np.where(d!=0.)] -# NN=d.shape[0] -# #print d[0] -# pass -# count+=NN - - -if __name__ == '__main__': - main() +def FilterEnergy(MinEnergy): + def filterfunction(batchdict): + r= np.where(np.array(batchdict['Eng']) > MinEnergy) + return r[0] + + return filterfunction + +def ProcessWireData(DownSampleFactor,ScanWindowSize,Norm=True): + def processfunction(D): + X=D[0] + BatchSize=X.shape[0] + if DownSampleFactor > 1: + X,Ny= DownSample(X,DownSampleFactor,BatchSize) + if ScanWindowSize>0: + #X,i,j=ScanWindow(X,ScanWindowSize,240,Ny) + X=crop_batch(X,ScanWindowSize) + + if Norm: + X = np.tanh(np.sign(X) * np.log(np.abs(X) + 1.0) / 2.0) + return [X[:,0,:,:],X[:,1,:,:]] +D[1:] + return processfunction + +# From Peter Sadowski +def crop_example(X, interval, augment=None): + ''' + Crop X by finding time interval with maximal energy. + X = tensor of shape (num_channel, x, y) = (2 channels, 240 wires, time steps) + interval = length of desired time step window + augment = If integer, randomly translate the time window up to this many steps. + ''' + assert len(X.shape) == 3, "Example is expected to be three-dimensional." + energy = np.sum(X, axis=(0,1)) + assert energy.ndim == 1 + cumsum = np.cumsum(energy, dtype='float64') + assert not np.any(np.isnan(cumsum)) + assert np.all(np.isfinite(cumsum)) + intsum = cumsum[interval:] - cumsum[:-interval] + maxstart = np.argmax(intsum) # NOTE: maxend=interval+np.argmax(intsum) + + if augment: + rsteps = np.random.random_integers(-augment, augment) + if rsteps < 0: + maxstart = max(0, maxstart + rsteps) + else: + maxstart = min(len(energy)-interval, maxstart + rsteps) + + return X[:, :, maxstart:maxstart+interval] + +def crop_batch(X, interval, augment=None): + new_X = np.zeros(shape=(X.shape[0],X.shape[1],X.shape[2],interval), dtype='float32') + for i in range(X.shape[0]): + new_X[i,:,:,:] = crop_example(X[i,:,:,:], interval, augment) + return new_X + + +def shuffle_in_unison_inplace(a, b, c=False): + assert len(a) == len(b) + p = np.random.permutation(len(a)) + if type(c) != bool: + return a[p], b[p], c[p] + return a[p], b[p] + +def DownSample(y,factor,batchsize,sumabs=False): + Nx=batchsize + Ny=y.shape[1] + Nz=y.shape[2] + Nw=y.shape[3] + + if factor==0: + return np.reshape(y,[Nx,Ny,Nz,Nw]),Nw + # Remove entries at the end so Down Sampling works + NwNew=Nw-Nw%factor + features1=np.reshape(y,[Nx,Ny,Nz,Nw])[:,:,:,0:NwNew] + # DownSample + if sumabs: + features_Down=abs(features1.reshape([Nz*NwNew/factor,factor])).sum(axis=3).reshape([Nx,Ny,Nz,NwNew/factor]) + else: + features_Down=features1.reshape([Nx,Ny,Nz*NwNew/factor,factor]).sum(axis=3).reshape([Nx,Ny,Nz,NwNew/factor]) + return features_Down, NwNew + + + + +#if __name__ == '__main__': +# main() From eec60521a602d050ebf3e7b5ef031830d31eefb9 Mon Sep 17 00:00:00 2001 From: Garrett Brown Date: Tue, 9 May 2017 15:01:11 -0500 Subject: [PATCH 16/22] Fixed merger histogram code --- Recon3DLoadData.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/Recon3DLoadData.py b/Recon3DLoadData.py index 60cca12..1a97270 100644 --- a/Recon3DLoadData.py +++ b/Recon3DLoadData.py @@ -57,12 +57,11 @@ def MergerGenerator(T2D, T3D): while True: s2d = T2D.Generator().next() s3d = T3D.Generator().next() - - if not T2D or not T3D: + if s2d and s3d: + yield s2d, s3d + else: break - yield s2d, s3d - return MergerGenerator(Train_gen2D, Train_gen3D) From cce3a33a01c3c8bbf742a0f9c79b9de6787511d2 Mon Sep 17 00:00:00 2001 From: Garrett Brown Date: Tue, 9 May 2017 16:40:02 -0500 Subject: [PATCH 17/22] Models progress --- Recon3DExperiment.py | 36 ++++++++++++++++++------------------ Recon3DModels.py | 36 +++++++++++++++++------------------- 2 files changed, 35 insertions(+), 37 deletions(-) diff --git a/Recon3DExperiment.py b/Recon3DExperiment.py index 9091f1c..9338c0d 100644 --- a/Recon3DExperiment.py +++ b/Recon3DExperiment.py @@ -48,7 +48,7 @@ def TestParamPrime(param, default=False): print "Loading 2D and 3D data" -Train2D, Train3D = combined2D3DGenerator() +#Train_gen = combined2D3DGenerator() print "Testing 2D and 3D data load" @@ -87,26 +87,26 @@ def TestParamPrime(param, default=False): #Test_genC = MakeGenerator(TestSampleList, NTestSamples, # cachefile="/tmp/LArTPCDNN-LArIAT-TestEvent-Cache.h5") # -print "Train Class Index Map:", Train_genC.ClassIndexMap +#print "Train Class Index Map:", Train_genC.ClassIndexMap # print "Test Class Index Map:", Test_genC.ClassIndexMap -Cache = True +#Cache = True -if Preload: - print "Caching data in memory for faster processing after first epoch. Hope you have enough memory." - Train_gen = Train_genC.PreloadGenerator() - Test_gen = Test_genC.PreloadGenerator() -elif Cache: - print "Caching data on disk for faster processing after first epoch. Hope you have enough disk space." - Train_gen = Train_genC.DiskCacheGenerator(n_threads_cache) - Test_gen = Test_genC.DiskCacheGenerator(n_threads_cache) -else: - Train_gen = Train_genC.Generator() - Test_gen = Test_genC.Generator() +#if Preload: +# print "Caching data in memory for faster processing after first epoch. Hope you have enough memory." +# Train_gen = Train_genC.PreloadGenerator() +# Test_gen = Test_genC.PreloadGenerator() +#elif Cache: +# print "Caching data on disk for faster processing after first epoch. Hope you have enough disk space." +# Train_gen = Train_genC.DiskCacheGenerator(n_threads_cache) +# Test_gen = Test_genC.DiskCacheGenerator(n_threads_cache) +#else: +# Train_gen = Train_genC.Generator() +# Test_gen = Test_genC.Generator() # Build/Load the Model from DLTools.ModelWrapper import ModelWrapper -from LArTPCDNN.Models import * +from LArTPCDNN.Recon3DModels import * # You can automatically load the latest previous training of this model. if TestDefaultParam("LoadPreviousModel") and not LoadModel: @@ -132,10 +132,10 @@ def TestParamPrime(param, default=False): print "Building Model...", - View1Shape = (240, 4096) - View2Shape = (240, 4096) + View1Shape = (240, 4096, 2) + View2Shape = (240, 4096, 2) - ReconstructionModel = Model2DViewsTo3D(Name, View1Shape, View2Shape, Width, Depth, + ReconstructionModel = Model2DViewsTo3DConv(Name, View1Shape, View2Shape, Width, Depth, BatchSize, NClasses, init=TestDefaultParam("WeightInitialization", 'normal'), # activation=TestDefaultParam("activation","relu"), diff --git a/Recon3DModels.py b/Recon3DModels.py index 8815dd6..79a31d5 100644 --- a/Recon3DModels.py +++ b/Recon3DModels.py @@ -15,7 +15,7 @@ class Model2DViewsTo3DDense(ModelWrapper): def __init__(self, Name, View1Shape, View2Shape, width=0, depth=0, BatchSize=2048, N_Classes=0, init=0, BatchNormalization=False, Dropout=False, **kwargs): - super(Model2DViewsTo3DDense, self).__init__(Name, **kwargs) + super(Model2DViewsTo3DDense, self).__init__(Name, Loss="categorical_crossentropy", **kwargs) self.width = width self.depth = depth @@ -41,9 +41,9 @@ def __init__(self, Name, View1Shape, View2Shape, width=0, depth=0, BatchSize=204 def Build(self): input1 = Input(self.input1_shape) input2 = Input(self.input2_shape) - input1 = Flatten(input_shape=self.input1_shape)(input1) - input2 = Flatten(input_shape=self.input2_shape)(input2) - modelT = concatenate([input1, input2]) + flat1 = Flatten(input_shape=self.input1_shape)(input1) + flat2 = Flatten(input_shape=self.input2_shape)(input2) + modelT = concatenate([flat1, flat2]) # model.add(Dense(self.width,init=self.init)) modelT = (Activation('relu')(modelT)) @@ -53,21 +53,21 @@ def Build(self): modelT = BatchNormalization()(modelT) modelT = Dense(self.width, kernel_initializer=self.init)(modelT) - modelT = Activation(self.Activation)(modelT) + modelT = Activation("softmax")(modelT) if self.Dropout: modelT = Dropout(self.Dropout)(modelT) modelT = Dense(self.N_Classes, activation='softmax', kernel_initializer=self.init)(modelT) - self.Model = Model(input, modelT) + self.Model = Model(inputs=[input1, input2], outputs=modelT) class Model2DViewsTo3DConv(ModelWrapper): def __init__(self, Name, View1Shape, View2Shape, width=0, depth=0, BatchSize=2048, N_Classes=0, init=0, BatchNormalization=False, Dropout=False, **kwargs): - super(Model2DViewsTo3DConv, self).__init__(Name, **kwargs) + super(Model2DViewsTo3DConv, self).__init__(Name, Loss="categorical_crossentropy", **kwargs) self.width = width self.depth = depth @@ -109,17 +109,15 @@ def Build(self): encoded2 = MaxPooling2D((2, 2), padding='same')(y) # concatenate images - z = concatenate([encoded1, encoded2]) + #z = concatenate([encoded1, encoded2]) # Now decode in 3D - z = Conv3D(8, (3, 3), activation='relu', padding='same')(z) - z = UpSampling3D((2, 2))(z) - z = Conv3D(32, (3, 3), activation='relu', padding='same')(z) - z = UpSampling3D((2, 2))(z) - z = Conv3D(64, (3, 3), activation='relu')(z) - z = UpSampling3D((2, 2))(z) - decoded = Conv3D(1, (3, 3), activation='sigmoid', padding='same')(z) - - autoencoder = Model(inputs=[input1, input2], outputs=decoded) - - self.Model = Model(input, autoencoder) + z = Conv3D(8, (3, 3, 3), activation='relu', padding='same')(encoded1 + encoded2) + z = UpSampling3D((2, 2, 2))(z) + z = Conv3D(32, (3, 3, 3), activation='relu', padding='same')(z) + z = UpSampling3D((2, 2, 2))(z) + z = Conv3D(64, (3, 3, 3), activation='relu')(z) + z = UpSampling3D((2, 2, 2))(z) + decoded = Conv3D(1, (3, 3, 3), activation='sigmoid', padding='same')(z) + + self.Model = Model(inputs=[input1, input2], outputs=decoded) From 391bdc49278f6012e1ff1438ab2af6e101dea6e1 Mon Sep 17 00:00:00 2001 From: thegrb93 Date: Thu, 11 May 2017 09:09:36 -0500 Subject: [PATCH 18/22] Have the generator yield data correctly --- Recon3DLoadData.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Recon3DLoadData.py b/Recon3DLoadData.py index 1a97270..3088651 100644 --- a/Recon3DLoadData.py +++ b/Recon3DLoadData.py @@ -58,7 +58,7 @@ def MergerGenerator(T2D, T3D): s2d = T2D.Generator().next() s3d = T3D.Generator().next() if s2d and s3d: - yield s2d, s3d + yield [s2d[0], s2d[1], s3d[0]] else: break From fd391b02e05591bf11be862b91b30ec5982a3565 Mon Sep 17 00:00:00 2001 From: Garrett Brown Date: Thu, 11 May 2017 15:56:40 -0500 Subject: [PATCH 19/22] Some more fixes --- Recon3DExperiment.py | 18 +++++++++--------- Recon3DLoadData.py | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/Recon3DExperiment.py b/Recon3DExperiment.py index 9338c0d..ee4e772 100644 --- a/Recon3DExperiment.py +++ b/Recon3DExperiment.py @@ -48,7 +48,7 @@ def TestParamPrime(param, default=False): print "Loading 2D and 3D data" -#Train_gen = combined2D3DGenerator() +Train_gen = combined2D3DGenerator() print "Testing 2D and 3D data load" @@ -132,10 +132,10 @@ def TestParamPrime(param, default=False): print "Building Model...", - View1Shape = (240, 4096, 2) - View2Shape = (240, 4096, 2) + View1Shape = (240, 256) + View2Shape = (240, 256) - ReconstructionModel = Model2DViewsTo3DConv(Name, View1Shape, View2Shape, Width, Depth, + ReconstructionModel = Model2DViewsTo3DDense(Name, View1Shape, View2Shape, Width, Depth, BatchSize, NClasses, init=TestDefaultParam("WeightInitialization", 'normal'), # activation=TestDefaultParam("activation","relu"), @@ -205,17 +205,17 @@ def TestParamPrime(param, default=False): verbose = 1 # Set to 2 print "Evaluating score on test sample..." - score = ReconstructionModel.Model.evaluate_generator(Test_gen, steps=NTestSamples / BatchSize) + #score = ReconstructionModel.Model.evaluate_generator(Test_gen, steps=NTestSamples / BatchSize) - print "Initial Score:", score - ReconstructionModel.MetaData["InitialScore"] = score + #print "Initial Score:", score + #ReconstructionModel.MetaData["InitialScore"] = score ReconstructionModel.History = ReconstructionModel.Model.fit_generator(Train_gen, steps_per_epoch=(NSamples / BatchSize), epochs=Epochs, verbose=verbose, - validation_data=Test_gen, - validation_steps=NTestSamples / BatchSize, + #validation_data=Test_gen, + #validation_steps=NTestSamples / BatchSize, callbacks=callbacks) score = ReconstructionModel.Model.evaluate_generator(Test_gen, steps=NTestSamples / BatchSize) diff --git a/Recon3DLoadData.py b/Recon3DLoadData.py index 3088651..b29e0c7 100644 --- a/Recon3DLoadData.py +++ b/Recon3DLoadData.py @@ -166,7 +166,7 @@ def LArIATDataGenerator(FileSearch="/data/LArIAT/*.h5",DownSampleSize=4, ScanWin -def LarTPCDataGenerator(files="/data", is2D = False, batchsize=16, datasets=['images3D/C', 'images3D/V'], Norm=True, +def LarTPCDataGenerator(files="/data", is2D = False, batchsize=128, datasets=['images3D/C', 'images3D/V'], Norm=True, bins=None, EnergyCut=0.61, DownSampleSize = 2, ScanWindowSize = 256, **kwargs): Samples = [] From 6f9b327f358dd239902eb53d97f53920529ff193 Mon Sep 17 00:00:00 2001 From: binarysaurus Date: Mon, 15 May 2017 16:53:53 -0500 Subject: [PATCH 20/22] Don't break generator --- Recon3DLoadData.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/Recon3DLoadData.py b/Recon3DLoadData.py index b29e0c7..ca3d32e 100644 --- a/Recon3DLoadData.py +++ b/Recon3DLoadData.py @@ -57,11 +57,8 @@ def MergerGenerator(T2D, T3D): while True: s2d = T2D.Generator().next() s3d = T3D.Generator().next() - if s2d and s3d: - yield [s2d[0], s2d[1], s3d[0]] - else: - break - + yield [s2d[0], s2d[1], s3d[0]] + return MergerGenerator(Train_gen2D, Train_gen3D) From 3f8dba68c06950f3078b790586aa1252c6f066cc Mon Sep 17 00:00:00 2001 From: cloudy Date: Mon, 15 May 2017 18:23:07 -0500 Subject: [PATCH 21/22] PROGRESS --- Recon3DExperiment.py | 7 +++++-- Recon3DLoadData.py | 8 ++++---- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/Recon3DExperiment.py b/Recon3DExperiment.py index ee4e772..6f1bbb8 100644 --- a/Recon3DExperiment.py +++ b/Recon3DExperiment.py @@ -28,6 +28,8 @@ NSamples = MaxEvents - NTestSamples +NSamples = BatchSize + # Function to help manage optional configurations. Checks and returns # if an object is in current scope. Return default value if not. def TestDefaultParam(Config): @@ -136,7 +138,7 @@ def TestParamPrime(param, default=False): View2Shape = (240, 256) ReconstructionModel = Model2DViewsTo3DDense(Name, View1Shape, View2Shape, Width, Depth, - BatchSize, NClasses, + BatchSize, 240*240*256, init=TestDefaultParam("WeightInitialization", 'normal'), # activation=TestDefaultParam("activation","relu"), Dropout=TestDefaultParam("DropoutLayers", 0.5), @@ -216,7 +218,8 @@ def TestParamPrime(param, default=False): verbose=verbose, #validation_data=Test_gen, #validation_steps=NTestSamples / BatchSize, - callbacks=callbacks) + #callbacks=callbacks + ) score = ReconstructionModel.Model.evaluate_generator(Test_gen, steps=NTestSamples / BatchSize) diff --git a/Recon3DLoadData.py b/Recon3DLoadData.py index ca3d32e..f21c1c1 100644 --- a/Recon3DLoadData.py +++ b/Recon3DLoadData.py @@ -55,11 +55,11 @@ def combined2D3DGenerator(): def MergerGenerator(T2D, T3D): while True: - s2d = T2D.Generator().next() - s3d = T3D.Generator().next() - yield [s2d[0], s2d[1], s3d[0]] + s2d = T2D.next() + s3d = T3D.next() + yield ([s2d[0], s2d[1]], [np.reshape(s3d[0], [128,240*240*256])]) - return MergerGenerator(Train_gen2D, Train_gen3D) + return MergerGenerator(Train_gen2D.Generator(), Train_gen3D.Generator()) From 25bcc09cf568274297e7e25bba11d539681272ba Mon Sep 17 00:00:00 2001 From: binarysaurus Date: Wed, 14 Jun 2017 00:19:27 -0500 Subject: [PATCH 22/22] Prime for cleaning --- Recon3DExperiment.py | 2 +- Recon3DLoadData.py | 125 +++++++++++++++++++++---------------------- 2 files changed, 61 insertions(+), 66 deletions(-) diff --git a/Recon3DExperiment.py b/Recon3DExperiment.py index 6f1bbb8..c91e959 100644 --- a/Recon3DExperiment.py +++ b/Recon3DExperiment.py @@ -50,7 +50,7 @@ def TestParamPrime(param, default=False): print "Loading 2D and 3D data" -Train_gen = combined2D3DGenerator() +Train_gen = combined2D3DGenerator(datapath = "/data/cloud/project/data/apr_9/") print "Testing 2D and 3D data load" diff --git a/Recon3DLoadData.py b/Recon3DLoadData.py index f21c1c1..8d3abab 100644 --- a/Recon3DLoadData.py +++ b/Recon3DLoadData.py @@ -4,65 +4,6 @@ from DLTools.ThreadedGenerator import DLMultiClassGenerator, DLMultiClassFilterGenerator -def combined2D3DGenerator(): - #datapath = "/data/datasets/LarTPC/apr_9/" - datapath = "/data/cloud/project/data/apr_9/" - # Pull in datafiles - filelist2d = glob.glob(datapath + "2d/*") - filelist3d = glob.glob(datapath + "3d/*") - filelist2d.sort() - filelist3d.sort() - assert len(filelist2d) == len(filelist3d), "Number of 2D and 3D files mismatch!" - - try: - n_threads = int(sys.argv[1]) - except: - n_threads = 6 - - try: - n_threads2 = int(sys.argv[2]) - except: - n_threads2 = n_threads - - - Train_gen3D = LarTPCDataGenerator(filelist3d, n_threads=n_threads, max=100000, - bins=(240, 240, 256), verbose=False) - - DownSampleSize=8 - ScanWindowSize=256 - Normalize=True - closefiles=False - m = 1 - Train_gen2D =LArIATDataGenerator(FileSearch=datapath + "2d/*", - max=128*10000, - batchsize=128, - DownSampleSize=DownSampleSize, - ScanWindowSize=ScanWindowSize, - Norm=Normalize, - #shapes=[(128*m, 2, 240, 4096/DownSampleSize), (128*m, 16)], - #shapes=[(128*m, 240, ScanWindowSize), (128, 240, 256)], - #shapes=[(128*m, 2, 240, ScanWindowSize), (128*m, 16)], - #shapes=[(128*m, 240, ScanWindowSize)], - n_threads=n_threads, - SharedDataQueueSize=1, - multiplier=m, - closefiles=closefiles, - verbose=False, - timing=False, - sleep=1, - Wrap=False) - - - def MergerGenerator(T2D, T3D): - while True: - s2d = T2D.next() - s3d = T3D.next() - yield ([s2d[0], s2d[1]], [np.reshape(s3d[0], [128,240*240*256])]) - - return MergerGenerator(Train_gen2D.Generator(), Train_gen3D.Generator()) - - - def main(): #datapath = "/data/datasets/LarTPC/apr_9/" datapath = "/data/cloud/project/data/apr_9/" @@ -126,6 +67,64 @@ def main(): count += NN +def combined2D3DGenerator(datapath): + #datapath = "/data/datasets/LarTPC/apr_9/" + # Pull in datafiles + filelist2d = glob.glob(datapath + "2d/*") + filelist3d = glob.glob(datapath + "3d/*") + filelist2d.sort() + filelist3d.sort() + assert len(filelist2d) == len(filelist3d), "Number of 2D and 3D files mismatch!" + + try: + n_threads = int(sys.argv[1]) + except: + n_threads = 6 + + try: + n_threads2 = int(sys.argv[2]) + except: + n_threads2 = n_threads + + + Train_gen3D = LarTPCDataGenerator(filelist3d, n_threads=n_threads, max=100000, + bins=(240, 240, 256), verbose=False) + + DownSampleSize=8 + ScanWindowSize=256 + Normalize=True + closefiles=False + m = 1 + Train_gen2D =LArIATDataGenerator(FileSearch=datapath + "2d/*", + max=128*10000, + batchsize=128, + DownSampleSize=DownSampleSize, + ScanWindowSize=ScanWindowSize, + Norm=Normalize, + #shapes=[(128*m, 2, 240, 4096/DownSampleSize), (128*m, 16)], + #shapes=[(128*m, 240, ScanWindowSize), (128, 240, 256)], + #shapes=[(128*m, 2, 240, ScanWindowSize), (128*m, 16)], + #shapes=[(128*m, 240, ScanWindowSize)], + n_threads=n_threads, + SharedDataQueueSize=1, + multiplier=m, + closefiles=closefiles, + verbose=False, + timing=False, + sleep=1, + Wrap=False) + + + def MergerGenerator(T2D, T3D): + while True: + s2d = T2D.next() + s3d = T3D.next() + yield ([s2d[0], s2d[1]], [np.reshape(s3d[0], [128,240*240*256])]) + + return MergerGenerator(Train_gen2D.Generator(), Train_gen3D.Generator()) + + + def LArIATDataGenerator(FileSearch="/data/LArIAT/*.h5",DownSampleSize=4, ScanWindowSize=256,EnergyCut=0.61, datasetnames=[u'features'], Norm=False, MaxFiles=-1, **kwargs): @@ -160,9 +159,6 @@ def LArIATDataGenerator(FileSearch="/data/LArIAT/*.h5",DownSampleSize=4, ScanWin - - - def LarTPCDataGenerator(files="/data", is2D = False, batchsize=128, datasets=['images3D/C', 'images3D/V'], Norm=True, bins=None, EnergyCut=0.61, DownSampleSize = 2, ScanWindowSize = 256, **kwargs): Samples = [] @@ -288,6 +284,5 @@ def DownSample(y,factor,batchsize,sumabs=False): - -#if __name__ == '__main__': -# main() +if __name__ == '__main__': + main()