diff --git a/Examples/OISTVisualizationTool/main.swift b/Examples/OISTVisualizationTool/main.swift
index fab3a70c..51a9a826 100644
--- a/Examples/OISTVisualizationTool/main.swift
+++ b/Examples/OISTVisualizationTool/main.swift
@@ -44,9 +44,9 @@ struct ViewFrame: ParsableCommand {
 
     let image = dataset.loadFrame(frameRawId)!
 
-    plot(image, boxes: dataset.labels[frameId].enumerated().map {
-      (String($0), $1.location)
-    }, margin: 10.0, scale: 0.5).show()
+    // plot(image, boxes: dataset.labels[frameId].enumerated().map {
+    //   (String($0), $1.location)
+    // }, margin: 10.0, scale: 0.5).show()
   }
 }
 
@@ -118,7 +118,7 @@ struct RawTrack: ParsableCommand {
     
     var tracker = makeRawPixelTracker(frames: videos, target: startPatch)
 
-    if verbose { tracker.optimizer.verbosity = .SUMMARY }
+    // if verbose { tracker.optimizer.verbosity = .SUMMARY } For LM Optimizer
 
     let prediction = tracker.infer(knownStart: Tuple1(startPose))
 
@@ -150,9 +150,9 @@ struct RawTrack: ParsableCommand {
       print("Creating output plot")
     }
     startTimer("PLOTTING")
-    plot(image, boxes: bboxes.indices.map {
-      ("\($0)", bboxes[$0])
-    }, margin: 10.0, scale: 0.5).show()
+    // plot(image, boxes: bboxes.indices.map {
+    //   ("\($0)", bboxes[$0])
+    // }, margin: 10.0, scale: 0.5).show()
     stopTimer("PLOTTING")
 
     if verbose {
@@ -207,11 +207,11 @@ struct PpcaTrack: ParsableCommand {
     startTimer("MAKE_GRAPH")
     var tracker = makePPCATracker(model: ppca, statistics: statistics, frames: videos, targetSize: (40, 70))
     stopTimer("MAKE_GRAPH")
+    // For LM Optimizer
+    // if verbose { tracker.optimizer.verbosity = .SUMMARY }
 
-    if verbose { tracker.optimizer.verbosity = .SUMMARY }
-
-    tracker.optimizer.cgls_precision = 1e-6
-    tracker.optimizer.precision = 1e-2
+    // tracker.optimizer.cgls_precision = 1e-6
+    // tracker.optimizer.precision = 1e-2
 
     startTimer("GRAPH_INFER")
     let prediction = tracker.infer(knownStart: Tuple2(startPose, Vector10(flatTensor: startLatent)))
@@ -255,9 +255,9 @@ struct PpcaTrack: ParsableCommand {
       print("Creating output plot")
     }
     startTimer("PLOTTING")
-    plot(image, boxes: bboxes.indices.map {
-      ("\($0)", bboxes[$0])
-    }, margin: 10.0, scale: 0.5).show()
+    // plot(image, boxes: bboxes.indices.map {
+    //   ("\($0)", bboxes[$0])
+    // }, margin: 10.0, scale: 0.5).show()
     stopTimer("PLOTTING")
 
     if verbose {
@@ -358,11 +358,12 @@ struct NaiveRae: ParsableCommand {
     stopTimer("MAKE_GRAPH")
 
     if verbose { print("Starting Optimization...") }
-    if verbose { tracker.optimizer.verbosity = .SUMMARY }
+    // For LM Optimizer
+    // if verbose { tracker.optimizer.verbosity = .SUMMARY }
 
-    tracker.optimizer.cgls_precision = 1e-7
-    tracker.optimizer.precision = 1e-4
-    tracker.optimizer.max_iteration = 200
+    // tracker.optimizer.cgls_precision = 1e-7
+    // tracker.optimizer.precision = 1e-4
+    // tracker.optimizer.max_iteration = 200
 
     startTimer("GRAPH_INFER")
     let prediction = tracker.infer(knownStart: Tuple1(startPose))
@@ -402,9 +403,9 @@ struct NaiveRae: ParsableCommand {
       print("Creating output plot")
     }
     startTimer("PLOTTING")
-    plot(image, boxes: bboxes.indices.map {
-      ("\($0)", bboxes[$0])
-    }, margin: 10.0, scale: 0.5).show()
+    // plot(image, boxes: bboxes.indices.map {
+    //   ("\($0)", bboxes[$0])
+    // }, margin: 10.0, scale: 0.5).show()
     stopTimer("PLOTTING")
 
     if verbose {
@@ -578,11 +579,12 @@ struct NaivePca: ParsableCommand {
     stopTimer("MAKE_GRAPH")
 
     if verbose { print("Starting Optimization...") }
-    if verbose { tracker.optimizer.verbosity = .SUMMARY }
+    // For LM Optimizer
+    // if verbose { tracker.optimizer.verbosity = .SUMMARY }
 
-    tracker.optimizer.cgls_precision = 1e-7
-    tracker.optimizer.precision = 1e-4
-    tracker.optimizer.max_iteration = 200
+    // tracker.optimizer.cgls_precision = 1e-7
+    // tracker.optimizer.precision = 1e-4
+    // tracker.optimizer.max_iteration = 200
 
     startTimer("GRAPH_INFER")
     let prediction = tracker.infer(knownStart: Tuple1(startPose))
@@ -622,9 +624,9 @@ struct NaivePca: ParsableCommand {
       print("Creating output plot")
     }
     startTimer("PLOTTING")
-    plot(image, boxes: bboxes.indices.map {
-      ("\($0)", bboxes[$0])
-    }, margin: 10.0, scale: 0.5).show()
+    // plot(image, boxes: bboxes.indices.map {
+    //   ("\($0)", bboxes[$0])
+    // }, margin: 10.0, scale: 0.5).show()
     stopTimer("PLOTTING")
 
     if verbose {
diff --git a/Package.resolved b/Package.resolved
index 03e8fe88..b25ddd9e 100644
--- a/Package.resolved
+++ b/Package.resolved
@@ -1,15 +1,6 @@
 {
   "object": {
     "pins": [
-      {
-        "package": "CSV.swift",
-        "repositoryURL": "https://github.com/yaslab/CSV.swift.git",
-        "state": {
-          "branch": null,
-          "revision": "81d2874c51db364d7e1d71b0d99018a294c87ac1",
-          "version": "2.4.3"
-        }
-      },
       {
         "package": "Penguin",
         "repositoryURL": "https://github.com/saeta/penguin.git",
@@ -19,15 +10,6 @@
           "version": null
         }
       },
-      {
-        "package": "Plotly",
-        "repositoryURL": "https://github.com/vojtamolda/Plotly.swift",
-        "state": {
-          "branch": null,
-          "revision": "6e80119ba37b913e5460459556e2bf58f02eba67",
-          "version": "0.4.0"
-        }
-      },
       {
         "package": "swift-argument-parser",
         "repositoryURL": "https://github.com/apple/swift-argument-parser.git",
@@ -46,24 +28,6 @@
           "version": "0.1.0"
         }
       },
-      {
-        "package": "swift-models",
-        "repositoryURL": "https://github.com/tensorflow/swift-models.git",
-        "state": {
-          "branch": null,
-          "revision": "b2fc0325bf9d476bf2d7a4cd0a09d36486c506e4",
-          "version": null
-        }
-      },
-      {
-        "package": "SwiftProtobuf",
-        "repositoryURL": "https://github.com/apple/swift-protobuf.git",
-        "state": {
-          "branch": null,
-          "revision": "da9a52be9cd36c63993291ce3f1b65dafcd1e826",
-          "version": "1.14.0"
-        }
-      },
       {
         "package": "swift-tools-support-core",
         "repositoryURL": "https://github.com/apple/swift-tools-support-core.git",
diff --git a/Package.swift b/Package.swift
index e7dc1074..762940f8 100644
--- a/Package.swift
+++ b/Package.swift
@@ -30,7 +30,6 @@ let package = Package(
     .package(name: "TensorBoardX", url: "https://github.com/ProfFan/tensorboardx-s4tf.git", from: "0.1.3"),
     .package(url: "https://github.com/apple/swift-tools-support-core.git", .branch("swift-5.2-branch")),
     .package(url: "https://github.com/apple/swift-argument-parser.git", from: "0.3.0"),
-    .package(name: "Plotly", url: "https://github.com/vojtamolda/Plotly.swift", from: "0.4.0"),
   ],
   targets: [
     // Targets are the basic building blocks of a package. A target can define a module or a test suite.
@@ -57,7 +56,6 @@ let package = Package(
       name: "BeeDataset",
       dependencies: [
         "SwiftFusion",
-        "Plotly",
         "ModelSupport",
       ]),
     .target(
@@ -86,7 +84,6 @@ let package = Package(
         "BeeTracking",
         .product(name: "PenguinParallelWithFoundation", package: "Penguin"),
         "SwiftFusion",
-        "Plotly",
         .product(name: "ArgumentParser", package: "swift-argument-parser"),
       ],
     path: "Examples/OISTVisualizationTool"),
@@ -97,7 +94,6 @@ let package = Package(
         "BeeTracking",
         .product(name: "PenguinParallelWithFoundation", package: "Penguin"),
         "SwiftFusion",
-        "Plotly",
         .product(name: "ArgumentParser", package: "swift-argument-parser"),
       ],
       path: "Scripts",
diff --git a/Scripts/Andrew01.swift b/Scripts/Andrew01.swift
index fdcf8297..051aabb1 100644
--- a/Scripts/Andrew01.swift
+++ b/Scripts/Andrew01.swift
@@ -11,14 +11,11 @@ import PenguinStructures
 
 /// Andrew01: RAE Tracker
 struct Andrew01: ParsableCommand {
-  @Option(help: "Run on track number x")
-  var trackId: Int = 0
-  
   @Option(help: "Run for number of frames")
   var trackLength: Int = 80
   
   @Option(help: "Size of feature space")
-  var featureSize: Int = 5
+  var featureSize: Int = 256
 
   @Option(help: "Pretrained weights")
   var weightsFile: String?
@@ -27,10 +24,12 @@ struct Andrew01: ParsableCommand {
   // Make sure you have a folder `Results/andrew01` before running
   func run() {
     let np = Python.import("numpy")
-    let kHiddenDimension = 100
+    let kHiddenDimension = 512
 
     let (imageHeight, imageWidth, imageChannels) =
       (40, 70, 1)
+
+    
     var rae = DenseRAE(
       imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels,
       hiddenDimension: kHiddenDimension, latentDimension: featureSize
@@ -42,20 +41,25 @@ struct Andrew01: ParsableCommand {
       rae.load(weights: np.load("./oist_rae_weight_\(featureSize).npy", allow_pickle: true))
     }
 
+    // let (imageHeight, imageWidth, imageChannels) =
+    //   (40, 70, 1)
+      
+    // let rp = RandomProjection(fromShape: TensorShape([imageHeight, imageWidth, imageChannels]), toFeatureSize: featureSize)
+
+    let trainingDatasetSize = 100
+
     let dataDir = URL(fileURLWithPath: "./OIST_Data")
-    let data = OISTBeeVideo(directory: dataDir, length: 100)!
-    let testData = OISTBeeVideo(directory: dataDir, afterIndex: 100, length: 80)!
+    let data = OISTBeeVideo(directory: dataDir, length: trainingDatasetSize)!
+    let testData = OISTBeeVideo(directory: dataDir, afterIndex: trainingDatasetSize, length: trackLength)!
 
     let trackerEvaluation = TrackerEvaluationDataset(testData)
-    
     let evalTracker: Tracker = {frames, start in
-        let trainingDatasetSize = 100
         var tracker = trainProbabilisticTracker(
             trainingData: data,
             encoder: rae,
             frames: frames,
             boundingBoxSize: (40, 70),
-            withFeatureSize: 5,
+            withFeatureSize: featureSize,
             fgRandomFrameCount: trainingDatasetSize,
             bgRandomFrameCount: trainingDatasetSize
         )
@@ -64,32 +68,36 @@ struct Andrew01: ParsableCommand {
 
         return track
     }
+    let plt = Python.import("matplotlib.pyplot")
+    let sequenceCount = 1
+    var results = trackerEvaluation.evaluate(evalTracker, sequenceCount: sequenceCount, deltaAnchor: 175, outputFile: "andrew01")
 
-    var results = trackerEvaluation.evaluate(evalTracker, sequenceCount: 5, deltaAnchor: 175, outputFile: "andrew01")
-    
-    
-    for (index, value) in results.sequences.prefix(5).enumerated() {
+    for (index, value) in results.sequences.prefix(sequenceCount).enumerated() {
       var i: Int = 0
       zip(value.subsequences.first!.frames, zip(value.subsequences.first!.prediction, value.subsequences.first!.groundTruth)).map {
-        let (fig, axes) = plotPatchWithGT(frame: $0.0, actual: $0.1.0.center, expected: $0.1.1.center)
+        let (fig, axes) = plotFrameWithPatches(frame: $0.0, actual: $0.1.0.center, expected: $0.1.1.center, firstGroundTruth: value.subsequences.first!.groundTruth.first!.center)
         fig.savefig("Results/andrew01/sequence\(index)/andrew01_\(i).png", bbox_inches: "tight")
+        plt.close("all")
         i = i + 1
       }
-      let plt = Python.import("matplotlib.pyplot")
+      
+      
       let (fig, axes) = plt.subplots(1, 2, figsize: Python.tuple([20, 20])).tuple2
       fig.suptitle("Tracking positions and Subsequence Average Overlap with Accuracy \(String(format: "%.2f", value.subsequences.first!.metrics.accuracy)) and Robustness \(value.subsequences.first!.metrics.robustness).")
       
       value.subsequences.map {
-        plotTrajectory(
-          track: $0.prediction.map{$0.center}, withGroundTruth: $0.groundTruth.map{$0.center}, on: axes[0],
-          withTrackColors: plt.cm.jet, withGtColors: plt.cm.gray
+        plotPoseDifference(
+          track: $0.prediction.map{$0.center}, withGroundTruth: $0.groundTruth.map{$0.center}, on: axes[0]
         )
       }
       plotOverlap(
           metrics: value.subsequences.first!.metrics, on: axes[1]
       )
-      fig.savefig("Results/andrew01/andrew01_subsequence\(index).pdf", bbox_inches: "tight")
+      fig.savefig("Results/andrew01/andrew01_subsequence\(index).png", bbox_inches: "tight")
+      print("Accuracy for sequence is \(value.sequenceMetrics.accuracy) with Robustness of \(value.sequenceMetrics.robustness)")
     }
+
+    print("Accuracy for all sequences is \(results.trackerMetrics.accuracy) with Robustness of \(results.trackerMetrics.robustness)")
     
 
 
@@ -103,4 +111,4 @@ fileprivate func unpack<A, B>(_ t: Tuple2<A, B>) -> (A, B) {
 /// Returns `t` as a Swift tuple.
 fileprivate func unpack<A>(_ t: Tuple1<A>) -> (A) {
   return (t.head)
-}
\ No newline at end of file
+}
diff --git a/Scripts/Andrew05.swift b/Scripts/Andrew05.swift
new file mode 100644
index 00000000..cedd68b5
--- /dev/null
+++ b/Scripts/Andrew05.swift
@@ -0,0 +1,120 @@
+import ArgumentParser
+
+import SwiftFusion
+import BeeDataset
+import BeeTracking
+import TensorFlow
+import PythonKit
+import Foundation
+
+import PenguinStructures
+
+/// Andrew01: RAE Tracker
+struct Andrew05: ParsableCommand {
+
+  @Option(help: "Run for number of frames")
+  var trackLength: Int = 80
+  
+  @Option(help: "Size of feature space")
+  var featureSize: Int = 256
+
+  @Option(help: "Pretrained weights")
+  var weightsFile: String?
+
+
+  // Comparison SiamMask and RAE
+  func run() {
+    let np = Python.import("numpy")
+    let plt = Python.import("matplotlib.pyplot")
+    let pickle = Python.import("pickle")
+
+    let trainingDatasetSize = 100
+
+    let dataDir = URL(fileURLWithPath: "./OIST_Data")
+    let data = OISTBeeVideo(directory: dataDir, length: trainingDatasetSize)!
+    let testData = OISTBeeVideo(directory: dataDir, afterIndex: trainingDatasetSize, length: trackLength)!
+    let testData2 = OISTBeeVideo(directory: dataDir, afterIndex: trainingDatasetSize, length: trackLength)!
+
+    let trackerEvaluation = TrackerEvaluationDataset(testData)
+    
+    var i = 0
+    let evalTrackerSiam: Tracker = {frames, start in
+        let decoder = JSONDecoder()
+        let trackPath = "Results/brando03/prediction_siammask_sequence_\(i).json"
+        let decodedTrack = try! decoder.decode([OrientedBoundingBox].self, from: Data(contentsOf: URL(fileURLWithPath: trackPath)))
+        i = i + 1
+        return decodedTrack
+    }
+    let sequenceCount = 19
+
+    var results_siam = trackerEvaluation.evaluate(evalTrackerSiam, sequenceCount: sequenceCount, deltaAnchor: 175, outputFile: "andrew02")
+    i = 0
+    let trackerEvaluation2 = TrackerEvaluationDataset(testData2)
+    let evalTrackerRae: Tracker = {frames, start in
+        let decoder = JSONDecoder()
+        let trackPath = "rae_256_updated_preds/prediction_rae_256_sequence_\(i).json"
+        let decodedTrack = try! decoder.decode([OrientedBoundingBox].self, from: Data(contentsOf: URL(fileURLWithPath: trackPath)))
+        i = i + 1
+        return decodedTrack
+    }
+    var results_rae = trackerEvaluation2.evaluate(evalTrackerRae, sequenceCount: sequenceCount, deltaAnchor: 175, outputFile: "andrew01")
+
+    
+    
+
+    // for (index, value) in results_rae.sequences.prefix(sequenceCount).enumerated() {
+    for j in 0...sequenceCount-1 {
+      let value_rae = results_rae.sequences.prefix(sequenceCount)[j]
+      let index = j
+      let value_siam =  results_siam.sequences.prefix(sequenceCount)[j]
+      let value = value_rae
+      var i: Int = 0
+      // zip(value.subsequences.first!.frames, zip(value.subsequences.first!.prediction, value.subsequences.first!.groundTruth)).map {
+      //   let (fig, axes) = plotFrameWithPatches(frame: $0.0, actual: $0.1.0.center, expected: $0.1.1.center, firstGroundTruth: value.subsequences.first!.groundTruth.first!.center)
+      //   fig.savefig("Results/brando03/sequence\(index)/brando03\(i).png", bbox_inches: "tight")
+      //   plt.close("all")
+      //   i = i + 1
+      // }
+      zip(value_rae.subsequences.first!.frames, zip(zip(value_rae.subsequences.first!.prediction,value_siam.subsequences.first!.prediction), value_rae.subsequences.first!.groundTruth)).map {
+        // let (fig, axes) = plotFrameWithPatches(frame: $0.0, actual: $0.1.0.center, expected: $0.1.1.center, firstGroundTruth: value.subsequences.first!.groundTruth.first!.center)
+        let (fig, _) = plotFrameWithPatches2(frame: $0.0, actual_box1: $0.1.0.0, actual_box2: $0.1.0.1, expected: $0.1.1.center, firstGroundTruth: value_rae.subsequences.first!.groundTruth.first!.center)
+        fig.savefig("Results/andrew01/sequence\(index)/comparison_\(i).png", bbox_inches: "tight")
+        plt.close("all")
+        i = i + 1
+      }
+      
+      // let (fig, axes) = plt.subplots(1, 2, figsize: Python.tuple([20, 20])).tuple2
+      // fig.suptitle("Tracking positions and Subsequence Average Overlap with Accuracy \(String(format: "%.2f", value_rae.subsequences.first!.metrics.accuracy)) and Robustness \(value_rae.subsequences.first!.metrics.robustness).")
+      // print("First Ground Truth")
+      // value_rae.subsequences.map {
+      //   print($0.prediction.first!)
+      //   $0.prediction.map{print("\(round($0.center.t.x)) \(round($0.center.t.y)) \($0.center.rot.theta) \(40) \(70)")}
+        
+      //   plotPoseDifference(
+      //     track: $0.prediction.map{$0.center}, withGroundTruth: $0.groundTruth.map{$0.center}, on: axes[0]
+      //   )
+      // }
+      // plotOverlap(
+      //     metrics: value_rae.subsequences.first!.metrics, on: axes[1]
+      // )
+      // fig.savefig("Results/andrew01/andrew01_subsequence\(index).png", bbox_inches: "tight")
+      print("Accuracy for sequence is \(value_rae.sequenceMetrics.accuracy) with Robustness of \(value_rae.sequenceMetrics.robustness)")
+    }
+
+    // print("Accuracy for all sequences is \(results.trackerMetrics.accuracy) with Robustness of \(results.trackerMetrics.robustness)")
+    // let f = Python.open("Results/EAO/rae_em_\(featureSize).data", "wb")
+    // pickle.dump(results.expectedAverageOverlap.curve, f)
+
+
+  }
+  
+}
+
+/// Returns `t` as a Swift tuple.
+fileprivate func unpack<A, B>(_ t: Tuple2<A, B>) -> (A, B) {
+  return (t.head, t.tail.head)
+}
+/// Returns `t` as a Swift tuple.
+fileprivate func unpack<A>(_ t: Tuple1<A>) -> (A) {
+  return (t.head)
+}
\ No newline at end of file
diff --git a/Scripts/Andrew06.swift b/Scripts/Andrew06.swift
new file mode 100644
index 00000000..337d4d4e
--- /dev/null
+++ b/Scripts/Andrew06.swift
@@ -0,0 +1,362 @@
+import ArgumentParser
+
+import SwiftFusion
+import BeeDataset
+import BeeTracking
+import TensorFlow
+import PythonKit
+import Foundation
+import ModelSupport
+
+import PenguinStructures
+
+let tf = Python.import("tensorflow")
+let np  = Python.import("numpy")
+let pickle = Python.import("pickle")
+
+// Optional to enable GPU training
+// let _ = _ExecutionContext.global
+// let device = Device.defaultXLA
+let device = Device.default
+let modelName = "BiT-M-R50x1"
+var knownModels = [String: String]()
+let knownDatasetSizes:[String: (Int, Int)] = [
+  "bee_dataset": (40, 70)
+]
+
+public struct LabeledData<Data, Label> {
+  /// The `data` of our sample (usually used as input for a model).
+  public let data: Data
+  /// The `label` of our sample (usually used as target for a model).
+  public let label: Label
+
+  /// Creates an instance from `data` and `label`.
+  public init(data: Data, label: Label) {
+    self.data = data
+    self.label = label
+  }
+}
+
+// Script to train and track with Big Transfer
+struct Andrew06: ParsableCommand {
+    /// This error indicates that BiT-Hyperrule cannot find the name of the dataset in the
+    /// knownDatasetSizes dictionary
+    enum DatasetNotFoundError: Error {
+    case invalidInput(String)
+    }
+
+    /// Return relevent ResNet enumerated type based on weights loaded
+    ///
+    /// - Parameters:
+    ///   - modelName: the name of the model pulled from the big transfer repository
+    ///                to grab the enumerated type for
+    /// - Returns: ResNet enumerated type for BigTransfer model
+    func getModelUnits(modelName: String) -> BigTransfer.Depth {
+    if modelName.contains("R50") {
+        return .resNet50
+    }
+    else if modelName.contains("R101") {
+        return .resNet101
+    }
+    else {
+        return .resNet152
+    }
+    }
+
+    /// Get updated image resolution based on the specifications in BiT-Hyperrule
+    ///
+    /// - Parameters:
+    ///   - originalResolution: the source resolution for the current image dataset
+    /// - Returns: new resolution for images based on BiT-Hyperrule
+    func getResolution(originalResolution: (Int, Int)) -> (Int, Int) {
+    let area = originalResolution.0 * originalResolution.1
+    return area < 96*96 ? (160, 128) : (512, 480)
+    }
+
+    /// Get the source resolution for the current image dataset from the knownDatasetSizes dictionary
+    ///
+    /// - Parameters:
+    ///   - datasetName: name of the current dataset you are using
+    /// - Returns: new resolution for specified dataset
+    /// - Throws:
+    ///   - DatasetNotFoundError: will throw an error if the dataset cannot be found in knownDatasetSizes dictionary
+    func getResolutionFromDataset(datasetName: String) throws -> (Int, Int) {
+    if let resolution = knownDatasetSizes[datasetName] {
+        return getResolution(originalResolution: resolution)
+    }
+    print("Unsupported dataset " + datasetName + ". Add your own here :)")
+    throw DatasetNotFoundError.invalidInput(datasetName)
+
+    }
+
+    /// Get training mixup parameters based on Bit-Hyperrule specification for dataset sizes
+    ///
+    /// - Parameters:
+    ///   - datasetSize: number of images in the current dataset
+    /// - Returns: mixup alpha based on number of images
+    func getMixUp(datasetSize: Int) -> Double {
+    return datasetSize < 20000 ? 0.0 : 0.1
+    }
+
+    /// Get the learning rate schedule based on the dataset size
+    ///
+    /// - Parameters:
+    ///   - datasetSize: number of images in the current dataset
+    /// - Returns: learning rate schedule based on the current dataset
+    func getSchedule(datasetSize: Int) -> Array<Int> {
+    if datasetSize < 20000{
+        return [100, 200, 300, 400, 500]
+    }
+    else if datasetSize < 500000 {
+        return [500, 3000, 6000, 9000, 10000]
+    }
+    else {
+        return [500, 6000, 12000, 18000, 20000]
+    }
+    }
+
+    /// Get learning rate at the current step given the dataset size and base learning rate
+    ///
+    /// - Parameters:
+    ///   - step: current training step
+    ///   - datasetSize: number of images in the dataset
+    ///   - baseLearningRate: starting learning rate to modify
+    /// - Returns: learning rate at the current step in training
+    func getLearningRate(step: Int, datasetSize: Int, baseLearningRate: Float = 0.003) -> Float? {
+    let supports = getSchedule(datasetSize: datasetSize)
+    // Linear warmup
+    if step < supports[0] {
+        return baseLearningRate * Float(step) / Float(supports[0])
+    }
+    // End of training
+    else if step >= supports.last! {
+        return nil
+    }
+    // Staircase decays by factor of 10
+    else {
+        var baseLearningRate = baseLearningRate
+        for s in supports[1...] {
+        if s < step {
+            baseLearningRate = baseLearningRate / 10.0
+        }
+        }
+        return baseLearningRate
+    }
+ }
+  public typealias Datum = (patch: Tensor<Float>, label: Tensor<Int32>)
+  public typealias LabeledImage = LabeledData<Tensor<Float>, Tensor<Int32>>
+  public typealias Batches = Slices<Sampling<[(patch: Tensor<Float>, label: Tensor<Int32>)], ArraySlice<Int>>>
+
+  func getTrainingDataBigTransfer(
+    from dataset: OISTBeeVideo,
+    numberForeground: Int = 10000,
+    numberBackground: Int = 10000
+    ) -> [Datum] {
+    let bgBoxes = dataset.makeBackgroundBoundingBoxes(patchSize: (40, 70), batchSize: numberBackground).map {
+      (patch: Tensor<Float>($0.frame!.patch(at: $0.obb)).unstacked(alongAxis: 2)[0], label: Tensor<Int32>(0))
+    }
+    let fgBoxes = dataset.makeForegroundBoundingBoxes(patchSize: (40, 70), batchSize: numberForeground).map {
+      (patch: Tensor<Float>($0.frame!.patch(at: $0.obb)).unstacked(alongAxis: 2)[0], label: Tensor<Int32>(1))
+    }
+    
+    var boxes = fgBoxes + bgBoxes
+    return boxes.map{(patch: Tensor<Float>(stacking: [$0.patch, $0.patch, $0.patch], alongAxis: 2), label: $0.label)}
+  }
+  
+    /// Stores the training statistics for the BigTransfer training process which are different than usual
+    /// because the mixedup labels must be accounted for while running training statistics.
+    struct BigTransferTrainingStatistics {
+        var correctGuessCount = Tensor<Int32>(0, on: Device.default)
+        var totalGuessCount = Tensor<Int32>(0, on: Device.default)
+        var totalLoss = Tensor<Float>(0, on: Device.default)
+        var batches: Int = 0
+        var accuracy: Float { 
+            Float(correctGuessCount.scalarized()) / Float(totalGuessCount.scalarized()) * 100 
+        } 
+        var averageLoss: Float { totalLoss.scalarized() / Float(batches) }
+
+        init(on device: Device = Device.default) {
+            correctGuessCount = Tensor<Int32>(0, on: device)
+            totalGuessCount = Tensor<Int32>(0, on: device)
+            totalLoss = Tensor<Float>(0, on: device)
+        }
+
+        mutating func update(logits: Tensor<Float>, labels: Tensor<Float>, loss: Tensor<Float>) {
+            let correct = logits.argmax(squeezingAxis: 1) .== labels.argmax(squeezingAxis: 1)
+            correctGuessCount += Tensor<Int32>(correct).sum()
+            totalGuessCount += Int32(labels.shape[0])
+            totalLoss += loss
+            batches += 1
+        }
+    }
+
+  fileprivate func makeBatch<BatchSamples: Collection>(
+    samples: BatchSamples, device: Device) -> LabeledImage where BatchSamples.Element == (patch: Tensor<Float>, label: Tensor<Int32>) {
+    let labels = Tensor<Int32>(samples.map(\.label))
+    let imageTensor = Tensor<Float>(samples.map(\.patch))
+    return LabeledImage(data: imageTensor, label: labels)
+}
+  // Train Big Transfer
+  func run() {
+    let dataDir = URL(fileURLWithPath: "./OIST_Data")
+
+    let trainingDataset = OISTBeeVideo(directory: dataDir, length: 80)!
+    let validationDataset = OISTBeeVideo(directory: dataDir, afterIndex: 80, length: 20)!
+
+    let training = getTrainingDataBigTransfer(from: trainingDataset, numberForeground: 20000, numberBackground: 20000)
+    let validation = getTrainingDataBigTransfer(from: validationDataset, numberForeground: 600, numberBackground: 600)
+
+    
+    let classCount = 2
+    var bitModel = BigTransfer(classCount: classCount, depth: getModelUnits(modelName: modelName), modelName: modelName)
+    let dataCount = 40000
+
+    var optimizer = SGD(for: bitModel, learningRate: 0.003, momentum: 0.9)
+    optimizer = SGD(copying: optimizer, to: device)
+
+    print("Beginning training...")
+    var batchSize: Int = 16
+    var currStep: Int = 1
+    let lrSupports = getSchedule(datasetSize: dataCount)
+    let scheduleLength = lrSupports.last!
+    let stepsPerEpoch = dataCount / batchSize
+    let epochCount = scheduleLength / stepsPerEpoch
+    let resizeSize = getResolution(originalResolution: (40, 70))
+
+    let trainingData = TrainingEpochs(samples: training, batchSize: batchSize).lazy.map { 
+        (batches: Batches) -> LazyMapSequence<Batches, LabeledImage> in
+            return batches.lazy.map{ makeBatch(samples: $0, device: device) } 
+        }
+
+    let validationData = validation.inBatches(of: batchSize).lazy.map {
+      makeBatch(samples: $0, device: device)
+    }
+
+    for (epoch, batches) in trainingData.prefix(epochCount).enumerated() {
+        let start = Date()
+        var trainStats = BigTransferTrainingStatistics(on: device)
+        var testStats = BigTransferTrainingStatistics(on: device)
+
+        Context.local.learningPhase = .training
+        for batch in batches {
+        if let newLearningRate = getLearningRate(step: currStep, datasetSize: dataCount, baseLearningRate: 0.003) {
+            optimizer.learningRate = newLearningRate
+            currStep = currStep + 1
+        }
+        else {
+            continue
+        }
+
+        var (eagerImages, eagerLabels) = (batch.data, batch.label)
+        let resized = resize(images: eagerImages, size: (resizeSize.0, resizeSize.1))
+        let flipped = tf.image.random_flip_left_right(resized.makeNumpyArray())
+        var newLabels = Tensor<Float>(Tensor<Int32>(oneHotAtIndices: eagerLabels, depth: classCount))
+        
+        let images = Tensor(copying: Tensor<Float>(numpy: flipped.numpy())!, to: device)
+        let labels = Tensor(copying: newLabels, to: device)
+        let 𝛁model = TensorFlow.gradient(at: bitModel) { bitModel -> Tensor<Float> in
+            let ŷ = bitModel(images)
+            let loss = softmaxCrossEntropy(logits: ŷ, probabilities: labels)
+            trainStats.update(logits: ŷ, labels: labels, loss: loss)
+            return loss
+        }
+
+        optimizer.update(&bitModel, along: 𝛁model)
+        
+        LazyTensorBarrier()
+        }
+
+        print("Checking validation statistics...")
+        Context.local.learningPhase = .inference
+        for batch in validationData {
+            var (eagerImages, eagerLabels) = (batch.data, batch.label)
+            let resized = resize(images: eagerImages, size: (resizeSize.0, resizeSize.1))
+            let newLabels = Tensor<Float>(Tensor<Int32>(oneHotAtIndices: eagerLabels, depth: classCount))
+            let images = Tensor(copying: resized, to: device)
+            let labels = Tensor(copying: newLabels, to: device)
+            let ŷ = bitModel(images)
+            let loss = softmaxCrossEntropy(logits: ŷ, probabilities: labels)
+            LazyTensorBarrier()
+            testStats.update(logits: ŷ, labels: labels, loss: loss)
+        }
+
+        print(
+            """
+            [Epoch \(epoch)] \
+            Training Loss: \(String(format: "%.3f", trainStats.averageLoss)), \
+            Training Accuracy: \(trainStats.correctGuessCount)/\(trainStats.totalGuessCount) \
+            (\(String(format: "%.1f", trainStats.accuracy))%), \
+            Test Loss: \(String(format: "%.3f", testStats.averageLoss)), \
+            Test Accuracy: \(testStats.correctGuessCount)/\(testStats.totalGuessCount) \
+            (\(String(format: "%.1f", testStats.accuracy))%) \
+            seconds per epoch: \(String(format: "%.1f", Date().timeIntervalSince(start)))
+            """)
+    }
+
+    let testData = OISTBeeVideo(directory: dataDir, afterIndex: 100, length: 80)!
+
+    let trackerEvaluation = TrackerEvaluationDataset(testData) 
+
+
+    
+    let evalTracker: Tracker = {frames, start in
+        var tracker = makeProbabilisticTracker2(
+            model: bitModel,
+            frames: frames,
+            targetSize: (40, 70)
+        )
+        let prediction = tracker.infer(knownStart: Tuple1(start.center), withSampling: true)
+        let track = tracker.frameVariableIDs.map { OrientedBoundingBox(center: prediction[unpack($0)], rows: 40, cols:70) }
+        return track
+
+    }
+
+    let plt = Python.import("matplotlib.pyplot")
+    let sequenceCount = 19
+    var results = trackerEvaluation.evaluate(evalTracker, sequenceCount: sequenceCount, deltaAnchor: 175, outputFile: "andrew01")
+
+    for (index, value) in results.sequences.prefix(sequenceCount).enumerated() {
+      var i: Int = 0
+      zip(value.subsequences.first!.frames, zip(value.subsequences.first!.prediction, value.subsequences.first!.groundTruth)).map {
+        let (fig, axes) = plotFrameWithPatches(frame: $0.0, actual: $0.1.0.center, expected: $0.1.1.center, firstGroundTruth: value.subsequences.first!.groundTruth.first!.center)
+        fig.savefig("Results/andrew01/sequence\(index)/andrew01_\(i).png", bbox_inches: "tight")
+        plt.close("all")
+        i = i + 1
+      }
+      
+      
+      let (fig, axes) = plt.subplots(1, 2, figsize: Python.tuple([20, 20])).tuple2
+      fig.suptitle("Tracking positions and Subsequence Average Overlap with Accuracy \(String(format: "%.2f", value.subsequences.first!.metrics.accuracy)) and Robustness \(value.subsequences.first!.metrics.robustness).")
+      
+      value.subsequences.map {
+        let encoder = JSONEncoder()
+        let data = try! encoder.encode($0.prediction)
+        FileManager.default.createFile(atPath: "prediction_bigtransfer_sequence_\(index).json", contents: data, attributes: nil)
+        plotPoseDifference(
+          track: $0.prediction.map{$0.center}, withGroundTruth: $0.groundTruth.map{$0.center}, on: axes[0]
+        )
+      }
+      plotOverlap(
+          metrics: value.subsequences.first!.metrics, on: axes[1]
+      )
+      fig.savefig("Results/andrew01/andrew01_subsequence\(index).png", bbox_inches: "tight")
+      print("Accuracy for sequence is \(value.sequenceMetrics.accuracy) with Robustness of \(value.sequenceMetrics.robustness)")
+    }
+
+    print("Accuracy for all sequences is \(results.trackerMetrics.accuracy) with Robustness of \(results.trackerMetrics.robustness)")
+    let f = Python.open("Results/EAO/bigtransfer.data", "wb")
+    pickle.dump(results.expectedAverageOverlap.curve, f)
+
+
+  }
+  
+}
+
+/// Returns `t` as a Swift tuple.
+fileprivate func unpack<A, B>(_ t: Tuple2<A, B>) -> (A, B) {
+  return (t.head, t.tail.head)
+}
+/// Returns `t` as a Swift tuple.
+fileprivate func unpack<A>(_ t: Tuple1<A>) -> (A) {
+  return (t.head)
+}
\ No newline at end of file
diff --git a/Scripts/Andrew07.swift b/Scripts/Andrew07.swift
new file mode 100644
index 00000000..83eb3ddb
--- /dev/null
+++ b/Scripts/Andrew07.swift
@@ -0,0 +1,342 @@
+import ArgumentParser
+
+import SwiftFusion
+import BeeDataset
+import BeeTracking
+import TensorFlow
+import PythonKit
+import Foundation
+import ModelSupport
+
+import PenguinStructures
+
+// Error gradient visualization script for Big Transfer
+struct Andrew07: ParsableCommand {
+    /// This error indicates that BiT-Hyperrule cannot find the name of the dataset in the
+    /// knownDatasetSizes dictionary
+    enum DatasetNotFoundError: Error {
+    case invalidInput(String)
+    }
+
+    /// Return relevent ResNet enumerated type based on weights loaded
+    ///
+    /// - Parameters:
+    ///   - modelName: the name of the model pulled from the big transfer repository
+    ///                to grab the enumerated type for
+    /// - Returns: ResNet enumerated type for BigTransfer model
+    func getModelUnits(modelName: String) -> BigTransfer.Depth {
+    if modelName.contains("R50") {
+        return .resNet50
+    }
+    else if modelName.contains("R101") {
+        return .resNet101
+    }
+    else {
+        return .resNet152
+    }
+    }
+
+    /// Get updated image resolution based on the specifications in BiT-Hyperrule
+    ///
+    /// - Parameters:
+    ///   - originalResolution: the source resolution for the current image dataset
+    /// - Returns: new resolution for images based on BiT-Hyperrule
+    func getResolution(originalResolution: (Int, Int)) -> (Int, Int) {
+    let area = originalResolution.0 * originalResolution.1
+    return area < 96*96 ? (160, 128) : (512, 480)
+    }
+
+    /// Get the source resolution for the current image dataset from the knownDatasetSizes dictionary
+    ///
+    /// - Parameters:
+    ///   - datasetName: name of the current dataset you are using
+    /// - Returns: new resolution for specified dataset
+    /// - Throws:
+    ///   - DatasetNotFoundError: will throw an error if the dataset cannot be found in knownDatasetSizes dictionary
+    func getResolutionFromDataset(datasetName: String) throws -> (Int, Int) {
+    if let resolution = knownDatasetSizes[datasetName] {
+        return getResolution(originalResolution: resolution)
+    }
+    print("Unsupported dataset " + datasetName + ". Add your own here :)")
+    throw DatasetNotFoundError.invalidInput(datasetName)
+
+    }
+
+    /// Get training mixup parameters based on Bit-Hyperrule specification for dataset sizes
+    ///
+    /// - Parameters:
+    ///   - datasetSize: number of images in the current dataset
+    /// - Returns: mixup alpha based on number of images
+    func getMixUp(datasetSize: Int) -> Double {
+    return datasetSize < 20000 ? 0.0 : 0.1
+    }
+
+    /// Get the learning rate schedule based on the dataset size
+    ///
+    /// - Parameters:
+    ///   - datasetSize: number of images in the current dataset
+    /// - Returns: learning rate schedule based on the current dataset
+    func getSchedule(datasetSize: Int) -> Array<Int> {
+    if datasetSize < 20000{
+        return [100, 200, 300, 400, 500]
+    }
+    else if datasetSize < 500000 {
+        return [500, 3000, 6000, 9000, 10000]
+    }
+    else {
+        return [500, 6000, 12000, 18000, 20000]
+    }
+    }
+
+    /// Get learning rate at the current step given the dataset size and base learning rate
+    ///
+    /// - Parameters:
+    ///   - step: current training step
+    ///   - datasetSize: number of images in the dataset
+    ///   - baseLearningRate: starting learning rate to modify
+    /// - Returns: learning rate at the current step in training
+    func getLearningRate(step: Int, datasetSize: Int, baseLearningRate: Float = 0.003) -> Float? {
+    let supports = getSchedule(datasetSize: datasetSize)
+    // Linear warmup
+    if step < supports[0] {
+        return baseLearningRate * Float(step) / Float(supports[0])
+    }
+    // End of training
+    else if step >= supports.last! {
+        return nil
+    }
+    // Staircase decays by factor of 10
+    else {
+        var baseLearningRate = baseLearningRate
+        for s in supports[1...] {
+        if s < step {
+            baseLearningRate = baseLearningRate / 10.0
+        }
+        }
+        return baseLearningRate
+    }
+ }
+  public typealias Datum = (patch: Tensor<Float>, label: Tensor<Int32>)
+  public typealias LabeledImage = LabeledData<Tensor<Float>, Tensor<Int32>>
+  public typealias Batches = Slices<Sampling<[(patch: Tensor<Float>, label: Tensor<Int32>)], ArraySlice<Int>>>
+
+  func getTrainingDataBigTransfer(
+    from dataset: OISTBeeVideo,
+    numberForeground: Int = 10000,
+    numberBackground: Int = 10000
+    ) -> [Datum] {
+    let bgBoxes = dataset.makeBackgroundBoundingBoxes(patchSize: (40, 70), batchSize: numberBackground).map {
+      (patch: Tensor<Float>($0.frame!.patch(at: $0.obb)).unstacked(alongAxis: 2)[0], label: Tensor<Int32>(0))
+    }
+    let fgBoxes = dataset.makeForegroundBoundingBoxes(patchSize: (40, 70), batchSize: numberForeground).map {
+      (patch: Tensor<Float>($0.frame!.patch(at: $0.obb)).unstacked(alongAxis: 2)[0], label: Tensor<Int32>(1))
+    }
+    
+    var boxes = fgBoxes + bgBoxes
+    return boxes.map{(patch: Tensor<Float>(stacking: [$0.patch, $0.patch, $0.patch], alongAxis: 2), label: $0.label)}
+  }
+  
+    /// Stores the training statistics for the BigTransfer training process which are different than usual
+    /// because the mixedup labels must be accounted for while running training statistics.
+    struct BigTransferTrainingStatistics {
+        var correctGuessCount = Tensor<Int32>(0, on: Device.default)
+        var totalGuessCount = Tensor<Int32>(0, on: Device.default)
+        var totalLoss = Tensor<Float>(0, on: Device.default)
+        var batches: Int = 0
+        var accuracy: Float { 
+            Float(correctGuessCount.scalarized()) / Float(totalGuessCount.scalarized()) * 100 
+        } 
+        var averageLoss: Float { totalLoss.scalarized() / Float(batches) }
+
+        init(on device: Device = Device.default) {
+            correctGuessCount = Tensor<Int32>(0, on: device)
+            totalGuessCount = Tensor<Int32>(0, on: device)
+            totalLoss = Tensor<Float>(0, on: device)
+        }
+
+        mutating func update(logits: Tensor<Float>, labels: Tensor<Float>, loss: Tensor<Float>) {
+            let correct = logits.argmax(squeezingAxis: 1) .== labels.argmax(squeezingAxis: 1)
+            correctGuessCount += Tensor<Int32>(correct).sum()
+            totalGuessCount += Int32(labels.shape[0])
+            totalLoss += loss
+            batches += 1
+        }
+    }
+
+  fileprivate func makeBatch<BatchSamples: Collection>(
+    samples: BatchSamples, device: Device) -> LabeledImage where BatchSamples.Element == (patch: Tensor<Float>, label: Tensor<Int32>) {
+    let labels = Tensor<Int32>(samples.map(\.label))
+    let imageTensor = Tensor<Float>(samples.map(\.patch))
+    return LabeledImage(data: imageTensor, label: labels)
+}
+  // Train Big Transfer
+  func run() {
+    let plt = Python.import("matplotlib.pyplot")
+    let dataDir = URL(fileURLWithPath: "./OIST_Data")
+
+    let trainingDataset = OISTBeeVideo(directory: dataDir, length: 80)!
+    let validationDataset = OISTBeeVideo(directory: dataDir, afterIndex: 80, length: 20)!
+
+    let training = getTrainingDataBigTransfer(from: trainingDataset, numberForeground: 3000, numberBackground: 3000)
+    let validation = getTrainingDataBigTransfer(from: validationDataset, numberForeground: 600, numberBackground: 600)
+
+    
+    let classCount = 2
+    var bitModel = BigTransfer(classCount: classCount, depth: getModelUnits(modelName: modelName), modelName: modelName)
+    let dataCount = 6000
+
+    var optimizer = SGD(for: bitModel, learningRate: 0.003, momentum: 0.9)
+    optimizer = SGD(copying: optimizer, to: device)
+
+    print("Beginning training...")
+    var batchSize: Int = 16
+    var currStep: Int = 1
+    let lrSupports = getSchedule(datasetSize: dataCount)
+    let scheduleLength = lrSupports.last!
+    let stepsPerEpoch = dataCount / batchSize
+    let epochCount = scheduleLength / stepsPerEpoch
+    let resizeSize = getResolution(originalResolution: (40, 70))
+
+    let trainingData = TrainingEpochs(samples: training, batchSize: batchSize).lazy.map { 
+        (batches: Batches) -> LazyMapSequence<Batches, LabeledImage> in
+            return batches.lazy.map{ makeBatch(samples: $0, device: device) } 
+        }
+
+    let validationData = validation.inBatches(of: batchSize).lazy.map {
+      makeBatch(samples: $0, device: device)
+    }
+
+    for (epoch, batches) in trainingData.prefix(epochCount).enumerated() {
+        let start = Date()
+        var trainStats = BigTransferTrainingStatistics(on: device)
+        var testStats = BigTransferTrainingStatistics(on: device)
+
+        Context.local.learningPhase = .training
+        for batch in batches {
+        if let newLearningRate = getLearningRate(step: currStep, datasetSize: dataCount, baseLearningRate: 0.003) {
+            optimizer.learningRate = newLearningRate
+            currStep = currStep + 1
+        }
+        else {
+            continue
+        }
+
+        var (eagerImages, eagerLabels) = (batch.data, batch.label)
+        let resized = resize(images: eagerImages, size: (resizeSize.0, resizeSize.1))
+        //let cropped = tf.image.random_crop(resized.makeNumpyArray(), [batchSize, resizeSize.0, resizeSize.1, 3])
+        let flipped = tf.image.random_flip_left_right(resized.makeNumpyArray())
+        var newLabels = Tensor<Float>(Tensor<Int32>(oneHotAtIndices: eagerLabels, depth: classCount))
+        
+        let images = Tensor(copying: Tensor<Float>(numpy: flipped.numpy())!, to: device)
+        let labels = Tensor(copying: newLabels, to: device)
+        let 𝛁model = TensorFlow.gradient(at: bitModel) { bitModel -> Tensor<Float> in
+            let ŷ = bitModel(images)
+            let loss = softmaxCrossEntropy(logits: ŷ, probabilities: labels)
+            trainStats.update(logits: ŷ, labels: labels, loss: loss)
+            return loss
+        }
+
+        optimizer.update(&bitModel, along: 𝛁model)
+        
+        LazyTensorBarrier()
+        }
+
+        print("Checking validation statistics...")
+        Context.local.learningPhase = .inference
+        for batch in validationData {
+            var (eagerImages, eagerLabels) = (batch.data, batch.label)
+            let resized = resize(images: eagerImages, size: (resizeSize.0, resizeSize.1))
+            let newLabels = Tensor<Float>(Tensor<Int32>(oneHotAtIndices: eagerLabels, depth: classCount))
+            let images = Tensor(copying: resized, to: device)
+            let labels = Tensor(copying: newLabels, to: device)
+            let ŷ = bitModel(images)
+            let loss = softmaxCrossEntropy(logits: ŷ, probabilities: labels)
+            LazyTensorBarrier()
+            testStats.update(logits: ŷ, labels: labels, loss: loss)
+        }
+
+        print(
+            """
+            [Epoch \(epoch)] \
+            Training Loss: \(String(format: "%.3f", trainStats.averageLoss)), \
+            Training Accuracy: \(trainStats.correctGuessCount)/\(trainStats.totalGuessCount) \
+            (\(String(format: "%.1f", trainStats.accuracy))%), \
+            Test Loss: \(String(format: "%.3f", testStats.averageLoss)), \
+            Test Accuracy: \(testStats.correctGuessCount)/\(testStats.totalGuessCount) \
+            (\(String(format: "%.1f", testStats.accuracy))%) \
+            seconds per epoch: \(String(format: "%.1f", Date().timeIntervalSince(start)))
+            """)
+    }
+
+    let testData = OISTBeeVideo(directory: dataDir, afterIndex: 100, length: 80)!
+
+    let trackerEvaluation = TrackerEvaluationDataset(testData) 
+
+    let frames = testData.frames
+    let firstTrack = testData.tracks[0]
+    let firstFrame = frames[0]
+    let firstObb = firstTrack.boxes[0]
+
+    let range = 100.0
+
+    let firstGroundTruth = firstObb.center
+    print("oBB coordinates", firstGroundTruth.t.x, firstGroundTruth.t.y)
+
+    let (fig, axs) = plt.subplots(1,2).tuple2
+    let fr = np.squeeze(firstFrame.makeNumpyArray())
+    axs[0].imshow(fr / 255.0, cmap: "gray")
+
+        
+    axs[0].set_xlim(firstGroundTruth.t.x - range/2, firstGroundTruth.t.x + range/2)
+    axs[0].set_ylim(firstGroundTruth.t.y - range/2, firstGroundTruth.t.y + range/2)
+    axs[1].set_xlim(0, range)
+    axs[1].set_ylim(0, range)
+    
+    let x = firstGroundTruth.t.x
+    let y = firstGroundTruth.t.y
+    
+    var values = Tensor<Double>(zeros: [Int(range), Int(range)])
+
+    for i in 0...Int(range)-1 {
+        for j in 0...Int(range)-1 {
+            let t = Vector2(x-range/2+Double(i), y-range/2+Double(j))
+            let p = Pose2(firstGroundTruth.rot, t)
+            var v = VariableAssignments()
+            let poseId = v.store(p)
+            let startpose = v[poseId]
+            var fg = FactorGraph()
+            let factorNNC = ProbablisticTrackingFactor2(poseId,
+            measurement: firstFrame,
+            classifier: bitModel,
+            patchSize: (40, 70),
+            appearanceModelSize: (40, 70)
+            )
+            fg.store(factorNNC)
+            values[i,j] = Tensor<Double>(factorNNC.errorVector(v[poseId]).x)
+            print(j)
+            print(i)
+        }
+    }
+    let min_val = values.min()
+    if Double(min_val)! < 0 {
+    values = values-min_val
+    }
+    values = values/values.max()*255
+    print(values[0...,0])
+    print(values.shape)
+    axs[1].imshow(values.makeNumpyArray())
+
+    fig.savefig("./Results/andrew01/vizual_NNC.png", bbox_inches: "tight")
+  }
+
+  
+}
+
+/// Returns `t` as a Swift tuple.
+fileprivate func unpack<A, B>(_ t: Tuple2<A, B>) -> (A, B) {
+  return (t.head, t.tail.head)
+}
+/// Returns `t` as a Swift tuple.
+fileprivate func unpack<A>(_ t: Tuple1<A>) -> (A) {
+  return (t.head)
+}
\ No newline at end of file
diff --git a/Scripts/Andrew08.swift b/Scripts/Andrew08.swift
new file mode 100644
index 00000000..d9c24018
--- /dev/null
+++ b/Scripts/Andrew08.swift
@@ -0,0 +1,439 @@
+import ArgumentParser
+
+import SwiftFusion
+import BeeDataset
+import BeeTracking
+import TensorFlow
+import PythonKit
+import Foundation
+import ModelSupport
+
+import PenguinStructures
+
+
+struct Andrew08: ParsableCommand {
+    /// This error indicates that BiT-Hyperrule cannot find the name of the dataset in the
+    /// knownDatasetSizes dictionary
+    enum DatasetNotFoundError: Error {
+    case invalidInput(String)
+    }
+    func initialize_and_perturb(p: Pose2) -> (Double, Double, Double, Pose2, VariableAssignments, TypedID<Pose2>, FactorGraph) {
+        var v = VariableAssignments()
+        let poseId = v.store(p)
+        v[poseId].perturbWith(stddev: Vector3(0.3, 8, 4.6))
+        let dx = v[poseId].t.x - p.t.x
+        let dy = v[poseId].t.y - p.t.y
+        let dtheta = v[poseId].rot.theta - p.rot.theta
+        let startpose = v[poseId]
+        let fg = FactorGraph()
+
+        return (dx, dy, dtheta, startpose, v, poseId, fg)
+    }
+
+    func initialize_empty_arrays() -> (Bool, [Double], [Double], [Double], [Double]) {
+        var conv = true
+        var errors = [Double]()
+        var xs = [Double]()
+        var ys = [Double]()
+        var thetas = [Double]()
+        return (conv, errors, xs, ys, thetas)
+    }
+    /// Return relevent ResNet enumerated type based on weights loaded
+    ///
+    /// - Parameters:
+    ///   - modelName: the name of the model pulled from the big transfer repository
+    ///                to grab the enumerated type for
+    /// - Returns: ResNet enumerated type for BigTransfer model
+    func getModelUnits(modelName: String) -> BigTransfer.Depth {
+    if modelName.contains("R50") {
+        return .resNet50
+    }
+    else if modelName.contains("R101") {
+        return .resNet101
+    }
+    else {
+        return .resNet152
+    }
+    }
+
+    /// Get updated image resolution based on the specifications in BiT-Hyperrule
+    ///
+    /// - Parameters:
+    ///   - originalResolution: the source resolution for the current image dataset
+    /// - Returns: new resolution for images based on BiT-Hyperrule
+    func getResolution(originalResolution: (Int, Int)) -> (Int, Int) {
+    let area = originalResolution.0 * originalResolution.1
+    return area < 96*96 ? (160, 128) : (512, 480)
+    }
+
+    /// Get the source resolution for the current image dataset from the knownDatasetSizes dictionary
+    ///
+    /// - Parameters:
+    ///   - datasetName: name of the current dataset you are using
+    /// - Returns: new resolution for specified dataset
+    /// - Throws:
+    ///   - DatasetNotFoundError: will throw an error if the dataset cannot be found in knownDatasetSizes dictionary
+    func getResolutionFromDataset(datasetName: String) throws -> (Int, Int) {
+    if let resolution = knownDatasetSizes[datasetName] {
+        return getResolution(originalResolution: resolution)
+    }
+    print("Unsupported dataset " + datasetName + ". Add your own here :)")
+    throw DatasetNotFoundError.invalidInput(datasetName)
+
+    }
+
+    /// Get training mixup parameters based on Bit-Hyperrule specification for dataset sizes
+    ///
+    /// - Parameters:
+    ///   - datasetSize: number of images in the current dataset
+    /// - Returns: mixup alpha based on number of images
+    func getMixUp(datasetSize: Int) -> Double {
+    return datasetSize < 20000 ? 0.0 : 0.1
+    }
+
+    /// Get the learning rate schedule based on the dataset size
+    ///
+    /// - Parameters:
+    ///   - datasetSize: number of images in the current dataset
+    /// - Returns: learning rate schedule based on the current dataset
+    func getSchedule(datasetSize: Int) -> Array<Int> {
+    if datasetSize == 100 {
+        return [25, 50, 75, 100]
+    }
+    if datasetSize < 20000{
+        return [100, 200, 300, 400, 500]
+    }
+    else if datasetSize < 500000 {
+        return [500, 3000, 6000, 9000, 10000]
+    }
+    else {
+        return [500, 6000, 12000, 18000, 20000]
+    }
+    }
+
+    /// Get learning rate at the current step given the dataset size and base learning rate
+    ///
+    /// - Parameters:
+    ///   - step: current training step
+    ///   - datasetSize: number of images in the dataset
+    ///   - baseLearningRate: starting learning rate to modify
+    /// - Returns: learning rate at the current step in training
+    func getLearningRate(step: Int, datasetSize: Int, baseLearningRate: Float = 0.003) -> Float? {
+    let supports = getSchedule(datasetSize: datasetSize)
+    // Linear warmup
+    print(step)
+    print(supports)
+    if step < supports[0] {
+        return baseLearningRate * Float(step) / Float(supports[0])
+    }
+    // End of training
+    else if step >= supports.last! {
+        return nil
+    }
+    // Staircase decays by factor of 10
+    else {
+        var baseLearningRate = baseLearningRate
+        for s in supports[1...] {
+        if s < step {
+            baseLearningRate = baseLearningRate / 10.0
+        }
+        }
+        return baseLearningRate
+    }
+ }
+  public typealias Datum = (patch: Tensor<Float>, label: Tensor<Int32>)
+  public typealias LabeledImage = LabeledData<Tensor<Float>, Tensor<Int32>>
+  public typealias Batches = Slices<Sampling<[(patch: Tensor<Float>, label: Tensor<Int32>)], ArraySlice<Int>>>
+
+  func getTrainingDataBigTransfer(
+    from dataset: OISTBeeVideo,
+    numberForeground: Int = 10000,
+    numberBackground: Int = 10000
+    ) -> [Datum] {
+    let bgBoxes = dataset.makeBackgroundBoundingBoxes(patchSize: (40, 70), batchSize: numberBackground).map {
+      (patch: Tensor<Float>($0.frame!.patch(at: $0.obb)).unstacked(alongAxis: 2)[0], label: Tensor<Int32>(0))
+    }
+    let fgBoxes = dataset.makeForegroundBoundingBoxes(patchSize: (40, 70), batchSize: numberForeground).map {
+      (patch: Tensor<Float>($0.frame!.patch(at: $0.obb)).unstacked(alongAxis: 2)[0], label: Tensor<Int32>(1))
+    }
+    
+    var boxes = fgBoxes + bgBoxes
+    return boxes.map{(patch: Tensor<Float>(stacking: [$0.patch, $0.patch, $0.patch], alongAxis: 2), label: $0.label)}
+  }
+  
+    /// Stores the training statistics for the BigTransfer training process which are different than usual
+    /// because the mixedup labels must be accounted for while running training statistics.
+    struct BigTransferTrainingStatistics {
+        var correctGuessCount = Tensor<Int32>(0, on: Device.default)
+        var totalGuessCount = Tensor<Int32>(0, on: Device.default)
+        var totalLoss = Tensor<Float>(0, on: Device.default)
+        var batches: Int = 0
+        var accuracy: Float { 
+            Float(correctGuessCount.scalarized()) / Float(totalGuessCount.scalarized()) * 100 
+        } 
+        var averageLoss: Float { totalLoss.scalarized() / Float(batches) }
+
+        init(on device: Device = Device.default) {
+            correctGuessCount = Tensor<Int32>(0, on: device)
+            totalGuessCount = Tensor<Int32>(0, on: device)
+            totalLoss = Tensor<Float>(0, on: device)
+        }
+
+        mutating func update(logits: Tensor<Float>, labels: Tensor<Float>, loss: Tensor<Float>) {
+            let correct = logits.argmax(squeezingAxis: 1) .== labels.argmax(squeezingAxis: 1)
+            correctGuessCount += Tensor<Int32>(correct).sum()
+            totalGuessCount += Int32(labels.shape[0])
+            totalLoss += loss
+            batches += 1
+        }
+    }
+
+  fileprivate func makeBatch<BatchSamples: Collection>(
+    samples: BatchSamples, device: Device) -> LabeledImage where BatchSamples.Element == (patch: Tensor<Float>, label: Tensor<Int32>) {
+    let labels = Tensor<Int32>(samples.map(\.label))
+    let imageTensor = Tensor<Float>(samples.map(\.patch))
+    return LabeledImage(data: imageTensor, label: labels)
+}
+  // Train Big Transfer
+  func run() {
+    let plt = Python.import("matplotlib.pyplot")
+    let dataDir = URL(fileURLWithPath: "./OIST_Data")
+
+    let trainingDataset = OISTBeeVideo(directory: dataDir, length: 80)!
+    let validationDataset = OISTBeeVideo(directory: dataDir, afterIndex: 80, length: 20)!
+
+    let training = getTrainingDataBigTransfer(from: trainingDataset, numberForeground: 256, numberBackground: 256)
+    let validation = getTrainingDataBigTransfer(from: validationDataset, numberForeground: 600, numberBackground: 600)
+
+    
+    let classCount = 2
+    var bitModel = BigTransfer(classCount: classCount, depth: getModelUnits(modelName: modelName), modelName: modelName)
+    let dataCount = 6000
+
+    var optimizer = SGD(for: bitModel, learningRate: 0.003, momentum: 0.9)
+    optimizer = SGD(copying: optimizer, to: device)
+
+    print("Beginning training...")
+    var batchSize: Int = 16
+    var currStep: Int = 1
+    let lrSupports = getSchedule(datasetSize: dataCount)
+    let scheduleLength = lrSupports.last!
+    let stepsPerEpoch = dataCount / batchSize
+    let epochCount = scheduleLength / stepsPerEpoch
+    let resizeSize = getResolution(originalResolution: (40, 70))
+
+    let trainingData = TrainingEpochs(samples: training, batchSize: batchSize).lazy.map { 
+        (batches: Batches) -> LazyMapSequence<Batches, LabeledImage> in
+            return batches.lazy.map{ makeBatch(samples: $0, device: device) } 
+        }
+
+    let validationData = validation.inBatches(of: batchSize).lazy.map {
+      makeBatch(samples: $0, device: device)
+    }
+
+    for (epoch, batches) in trainingData.prefix(epochCount).enumerated() {
+        let start = Date()
+        var trainStats = BigTransferTrainingStatistics(on: device)
+        var testStats = BigTransferTrainingStatistics(on: device)
+
+        Context.local.learningPhase = .training
+        for batch in batches {
+        if let newLearningRate = getLearningRate(step: currStep, datasetSize: dataCount, baseLearningRate: 0.003) {
+            optimizer.learningRate = newLearningRate
+            currStep = currStep + 1
+        }
+        else {
+            continue
+        }
+
+        var (eagerImages, eagerLabels) = (batch.data, batch.label)
+        let resized = resize(images: eagerImages, size: (resizeSize.0, resizeSize.1))
+        let flipped = tf.image.random_flip_left_right(resized.makeNumpyArray())
+        var newLabels = Tensor<Float>(Tensor<Int32>(oneHotAtIndices: eagerLabels, depth: classCount))
+        
+        let images = Tensor(copying: Tensor<Float>(numpy: flipped.numpy())!, to: device)
+        let labels = Tensor(copying: newLabels, to: device)
+        let 𝛁model = TensorFlow.gradient(at: bitModel) { bitModel -> Tensor<Float> in
+            let ŷ = bitModel(images)
+            let loss = softmaxCrossEntropy(logits: ŷ, probabilities: labels)
+            trainStats.update(logits: ŷ, labels: labels, loss: loss)
+            return loss
+        }
+
+        optimizer.update(&bitModel, along: 𝛁model)
+        
+        LazyTensorBarrier()
+        }
+
+        print("Checking validation statistics...")
+        Context.local.learningPhase = .inference
+        for batch in validationData {
+            var (eagerImages, eagerLabels) = (batch.data, batch.label)
+            let resized = resize(images: eagerImages, size: (resizeSize.0, resizeSize.1))
+            let newLabels = Tensor<Float>(Tensor<Int32>(oneHotAtIndices: eagerLabels, depth: classCount))
+            let images = Tensor(copying: resized, to: device)
+            let labels = Tensor(copying: newLabels, to: device)
+            let ŷ = bitModel(images)
+            let loss = softmaxCrossEntropy(logits: ŷ, probabilities: labels)
+            LazyTensorBarrier()
+            testStats.update(logits: ŷ, labels: labels, loss: loss)
+        }
+
+        print(
+            """
+            [Epoch \(epoch)] \
+            Training Loss: \(String(format: "%.3f", trainStats.averageLoss)), \
+            Training Accuracy: \(trainStats.correctGuessCount)/\(trainStats.totalGuessCount) \
+            (\(String(format: "%.1f", trainStats.accuracy))%), \
+            Test Loss: \(String(format: "%.3f", testStats.averageLoss)), \
+            Test Accuracy: \(testStats.correctGuessCount)/\(testStats.totalGuessCount) \
+            (\(String(format: "%.1f", testStats.accuracy))%) \
+            seconds per epoch: \(String(format: "%.1f", Date().timeIntervalSince(start)))
+            """)
+    }
+
+    let testData = OISTBeeVideo(directory: dataDir, afterIndex: 100, length: 80)!
+
+    let trackerEvaluation = TrackerEvaluationDataset(testData) 
+
+    let frames = testData.frames
+    let firstTrack = testData.tracks[0]
+    let firstFrame = frames[0]
+    let firstObb = firstTrack.boxes[0]
+
+    let lr = 100.0
+    var GDOptimizer = GradientDescent(learningRate: lr)
+    let it_limit = 80
+
+
+    let folderName = "Results/GD_optimization_BiT_lr_\(lr)__10_22_2021_final_images_4subplots"
+    if !FileManager.default.fileExists(atPath: folderName) {
+    do {
+        try FileManager.default.createDirectory(atPath: folderName, withIntermediateDirectories: true, attributes: nil)
+    } catch {
+        print(error.localizedDescription)
+    }
+    }
+
+    print("hello1")
+    let (fig, axs) = plt.subplots(2,2).tuple2
+    let fr = np.squeeze(firstFrame.makeNumpyArray())
+    for i in 0...1 {
+      for j in 0...1 {
+        axs[i,j].imshow(fr / 255.0, cmap: "gray")
+        let firstGroundTruth = firstObb.center
+        axs[i,j].set_xlim(firstGroundTruth.t.x - 50, firstGroundTruth.t.x + 50)
+        axs[i,j].set_ylim(firstGroundTruth.t.y - 50, firstGroundTruth.t.y + 50)
+        axs[i,j].get_xaxis().set_visible(false)
+        axs[i,j].get_yaxis().set_visible(false)
+      }
+    }
+    axs[0,0].set_title("fabs(theta) < 6 Degrees", fontsize:8)
+    axs[0,1].set_title("fabs(theta) < 12 Degrees", fontsize:8)
+    axs[1,0].set_title("fabs(theta) < 16 Degrees", fontsize:8)
+    axs[1,1].set_title("fabs(theta) >= 16 Degrees", fontsize:8)
+
+    print("hello")
+    let xy_thresh = 20.0 //pixels
+    let theta_thresh = 0.5 //radians // consider doing overlap.
+
+    
+    // NN Params
+    let (imageHeight, imageWidth, imageChannels) = (40, 70, 1)
+    let featureSize = 256
+    let kHiddenDimension = 512
+
+    var useClassifier = true
+    if useClassifier {
+      var classifier = bitModel
+      for j in 0...200 {
+        var (dx, dy, dtheta, startpose, v, poseId, fg) = initialize_and_perturb(p: firstObb.center)
+        let factorNNC = ProbablisticTrackingFactor2(poseId,
+          measurement: firstFrame,
+          classifier: classifier,
+          patchSize: (40, 70),
+          appearanceModelSize: (40, 70)
+        )
+        fg.store(factorNNC)
+
+
+        // PERFORM GRADIENT DESCENT
+        var (conv, errors, xs, ys, thetas) = initialize_empty_arrays()
+        print("starting optimization")
+        for i in 0..<it_limit {
+            errors.append(factorNNC.errorVector(v[poseId]).x)
+            xs.append(v[poseId].t.x)
+            ys.append(v[poseId].t.y)
+            thetas.append(v[poseId].rot.theta)
+            let oldpose = v[poseId]
+            GDOptimizer.learningRate = Double(getLearningRate(step: i + 1, datasetSize: 100, baseLearningRate: 0.01)!)
+            GDOptimizer.update(&v, objective: fg)
+
+            print(v[poseId].t.x - oldpose.t.x)
+            if abs(v[poseId].t.x - oldpose.t.x) < 0.000001 && abs(v[poseId].t.y - oldpose.t.y) < 0.000001 && abs(v[poseId].rot.theta - oldpose.rot.theta) < 0.000001{
+            break
+            }
+            if i == it_limit-1 {
+            conv = false
+            }
+        }
+
+        let x_out_of_bounds = (v[poseId].t.x > firstObb.center.t.x + xy_thresh) || (v[poseId].t.x < firstObb.center.t.x - xy_thresh)
+        let y_out_of_bounds =  (v[poseId].t.y > firstObb.center.t.y + xy_thresh) || (v[poseId].t.y < firstObb.center.t.y - xy_thresh)
+        let theta_out_of_bounds = (v[poseId].rot.theta > firstObb.center.rot.theta + theta_thresh) || (v[poseId].rot.theta < firstObb.center.rot.theta - theta_thresh)
+        if !x_out_of_bounds && !theta_out_of_bounds && !y_out_of_bounds {
+            if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.1 {
+                axs[0,0].plot(startpose.t.x,startpose.t.y,"g,", ms: 5)
+            } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.2 {
+                axs[0,1].plot(startpose.t.x,startpose.t.y,"g,", ms: 5)
+            } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.3 {
+                axs[1,0].plot(startpose.t.x,startpose.t.y,"g,", ms: 5)
+            } else {
+                axs[1,1].plot(startpose.t.x,startpose.t.y,"g,", ms: 5)
+            }
+            
+        } else {
+            if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.1 {
+                axs[0,0].plot(startpose.t.x,startpose.t.y,"r,", ms: 5)
+            } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.2 {
+                axs[0,1].plot(startpose.t.x,startpose.t.y,"r,", ms: 5)
+            } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.3 {
+                axs[1,0].plot(startpose.t.x,startpose.t.y,"r,", ms: 5)
+            } else {
+                axs[1,1].plot(startpose.t.x,startpose.t.y,"r,", ms: 5)
+            }
+        }
+        let (figs, axes) = plotFrameWithPatches3(frame: firstFrame, start: startpose, end: v[poseId], expected: firstObb.center, firstGroundTruth: firstObb.center, errors: errors, xs: xs, ys: ys, thetas: thetas)
+        var final_err: Double
+        var label_err: Double
+        var start_err: Double
+
+        
+        final_err = factorNNC.errorVector(v[poseId]).x
+        label_err = factorNNC.errorVector(firstObb.center).x
+        start_err = factorNNC.errorVector(startpose).x
+
+        axes.set_title(String(axes.get_title())! + "\n final err = \(final_err)" 
+        + "\n label err = \(label_err).x)" 
+        + "\n start err = \(start_err)"
+        + "\n learning rate = \(lr)"
+        + "\n converged = \(conv)")
+        figs.savefig(folderName + "/optimization_final_\(j).png", bbox_inches: "tight")
+        plt.close("all")
+        fig.savefig(folderName + "/optimization_covergence_red_n_green_dots.png", bbox_inches: "tight")
+    
+        }
+    }
+}
+
+  
+}
+
+/// Returns `t` as a Swift tuple.
+fileprivate func unpack<A, B>(_ t: Tuple2<A, B>) -> (A, B) {
+  return (t.head, t.tail.head)
+}
+/// Returns `t` as a Swift tuple.
+fileprivate func unpack<A>(_ t: Tuple1<A>) -> (A) {
+  return (t.head)
+}
\ No newline at end of file
diff --git a/Scripts/Brando01.swift b/Scripts/Brando01.swift
new file mode 100644
index 00000000..0a4b9340
--- /dev/null
+++ b/Scripts/Brando01.swift
@@ -0,0 +1,102 @@
+import ArgumentParser
+
+import SwiftFusion
+import BeeDataset
+import BeeTracking
+import TensorFlow
+import PythonKit
+import Foundation
+
+import PenguinStructures
+
+/// Brando01: Tracker OpenCV
+struct Brando01: ParsableCommand {
+  func run() {
+
+    let dataDir = URL(fileURLWithPath: "./OIST_Data")
+    let data = OISTBeeVideo(directory: dataDir, length: 100)!
+    let testData = OISTBeeVideo(directory: dataDir, afterIndex: 100, length: 80)!
+    print("number of frames in training data:", data.labels.count)
+    print("number of frames in testing data", testData.labels.count, "\n\n")
+
+
+    let trackerEvaluation = TrackerEvaluationDataset(testData)
+
+    let np = Python.import("numpy")
+    let cv2 = Python.import("cv2")
+
+
+    let evalTracker: Tracker = {frames, start in
+    
+      let tracker = cv2.TrackerMIL_create()
+
+      var BB = Python.tuple([Int(start.center.t.x)-20, Int(start.center.t.y)-35, 40, 70])
+      var smallframe = np.array(frames.first!.makeNumpyArray())
+      let leftpt = Python.tuple([Int(start.center.t.x)-35, Int(start.center.t.y)-35])
+      let rgtpt = Python.tuple([Int(start.center.t.x)+35, Int(start.center.t.y)+35])
+      cv2.rectangle(smallframe, leftpt, rgtpt, Python.tuple([0,150,0]), 5)
+      cv2.imwrite("./image_new.png", smallframe)
+      tracker[dynamicMember: "init"](frames.first!.makeNumpyArray(), BB)
+      var results = [PythonObject]()
+      for (index, frame) in frames.enumerated() {
+        var a = tracker[dynamicMember: "update"](frame.makeNumpyArray()).tuple2
+            let track_success = a.0
+            let newBB = a.1
+            if Bool(track_success)! {
+                results.append(newBB)
+            }
+
+
+               
+      }
+      print("printing python BB")
+      var track = [OrientedBoundingBox]()
+      for result in results {
+        let pythonBB = result.tuple4
+        let rows = Int(pythonBB.2)!
+        let cols = Int(pythonBB.3)!
+        let rot = Rot2(0)
+        let vect = Vector2(Double(pythonBB.0)!+20, Double(pythonBB.1)!+35)
+        let center = Pose2(rot, vect)
+        let swiftBB = OrientedBoundingBox(center: center, rows: rows, cols: cols)
+        track.append(swiftBB)
+      }
+      return track
+    }
+
+  
+
+
+
+    let plt = Python.import("matplotlib.pyplot")
+    let sequenceCount = 1
+    var results = trackerEvaluation.evaluate(evalTracker, sequenceCount: 1, deltaAnchor: 175, outputFile: "brando01")
+    for (index, value) in results.sequences.prefix(1).enumerated() {
+      var i: Int = 0
+      zip(value.subsequences.first!.frames, zip(value.subsequences.first!.prediction, value.subsequences.first!.groundTruth)).map {
+        let (fig, axes) = plotFrameWithPatches(frame: $0.0, actual: $0.1.0.center, expected: $0.1.1.center, firstGroundTruth: value.subsequences.first!.groundTruth.first!.center)
+        fig.savefig("Results/brando01/sequence\(index)/brando01\(i).png", bbox_inches: "tight")
+        plt.close("all")
+        i = i + 1
+      }
+      
+      
+      let (fig, axes) = plt.subplots(1, 2, figsize: Python.tuple([20, 20])).tuple2
+      fig.suptitle("Tracking positions and Subsequence Average Overlap with Accuracy \(String(format: "%.2f", value.subsequences.first!.metrics.accuracy)) and Robustness \(value.subsequences.first!.metrics.robustness).")
+      
+      value.subsequences.map {
+        plotPoseDifference(
+          track: $0.prediction.map{$0.center}, withGroundTruth: $0.groundTruth.map{$0.center}, on: axes[0]
+        )
+      }
+      plotOverlap(
+          metrics: value.subsequences.first!.metrics, on: axes[1]
+      )
+      fig.savefig("Results/brando01/brando01_subsequence\(index).png", bbox_inches: "tight")
+      print("Accuracy for sequence is \(value.sequenceMetrics.accuracy) with Robustness of \(value.sequenceMetrics.robustness)")
+    }
+
+    print("Accuracy for all sequences is \(results.trackerMetrics.accuracy) with Robustness of \(results.trackerMetrics.robustness)")
+    
+  }
+}
\ No newline at end of file
diff --git a/Scripts/Brando02.swift b/Scripts/Brando02.swift
new file mode 100644
index 00000000..ad027cb2
--- /dev/null
+++ b/Scripts/Brando02.swift
@@ -0,0 +1,66 @@
+import ArgumentParser
+
+import SwiftFusion
+import BeeDataset
+import BeeTracking
+import TensorFlow
+import PythonKit
+import Foundation
+
+import PenguinStructures
+
+/// Brando02 OpenCV tracker
+struct Brando02: ParsableCommand {
+    func run() {
+
+        let np = Python.import("numpy")
+        let cv2 = Python.import("cv2")
+        let os = Python.import("os")
+        let image_names = os.listdir("../OIST_Data/downsampled")
+		let track_names = os.listdir("../OIST_Data/tracks")
+        image_names.sort()
+		track_names.sort()
+        let track = track_names[10]
+		let frame = cv2.imread("../OIST_Data/downsampled/" + image_names[0])
+        let centers = Python.list()
+		let fs = Python.open("../OIST_Data/tracks/" + track, "r")
+		let lines = fs.readlines()
+        print(type(of: lines))
+        var i = 0
+		for line in lines {
+            if i == 0 {
+                i += 1
+                continue
+            }
+            i += 1
+            let lineSwift = String(line)
+            let lineSwift2 = lineSwift ?? ""
+            let nums = lineSwift2.components(separatedBy: " ")
+            let height = Float(nums[1])
+            let width = Float(nums[0])
+			centers.append(Python.tuple([Python.float(width),Python.float(height)]))
+        }
+
+
+        let width1 = Float(centers[0][0])
+		let height1 = Float(centers[0][1])
+        let width = width1 ?? 0
+        let height = height1 ?? 0
+		let BB = Python.tuple([Int(width-35),Int(height-35),70,70])
+        let tracker = cv2.TrackerMIL_create()
+        tracker[dynamicMember: "init"](frame, BB)
+        var results = [PythonObject]()
+        for image_name in image_names {
+			let framei = cv2.imread("../OIST_Data/downsampled/" + image_name)
+            var a = tracker[dynamicMember: "update"](framei).tuple2
+            let track_success = a.0
+            let newBB = a.1
+            if Bool(track_success)! {
+                results.append(newBB)
+            }
+        }
+
+    
+    }
+
+}
\ No newline at end of file
diff --git a/Scripts/Brando03.swift b/Scripts/Brando03.swift
new file mode 100644
index 00000000..cc6b4f3b
--- /dev/null
+++ b/Scripts/Brando03.swift
@@ -0,0 +1,206 @@
+import ArgumentParser
+
+import SwiftFusion
+import BeeDataset
+import BeeTracking
+import TensorFlow
+import PythonKit
+import Foundation
+
+import PenguinStructures
+
+/// Brando01 SiamMask Tracker
+struct Brando03: ParsableCommand {
+
+  func run() {
+
+    let dataDir = URL(fileURLWithPath: "./OIST_Data")
+    let testData = OISTBeeVideo(directory: dataDir, afterIndex: 100, length: 80)!
+    print("number of frames in testing data", testData.labels.count, "\n\n")
+
+
+    let trackerEvaluation = TrackerEvaluationDataset(testData)
+    let os = Python.import("os")
+    let torch = Python.import("torch")
+
+    let np = Python.import("numpy")
+    let smtools = Python.import("SiamMask.tools")
+    let smutils = Python.import("SiamMask.utils")
+    let cfhelper = Python.import("SiamMask.utils.config_helper")
+    let ldhelper = Python.import("SiamMask.utils.load_helper")
+    let smtest = Python.import("SiamMask.tools.test")
+    
+
+    let cv2 = Python.import("cv2")
+  
+    let argparse = Python.import("argparse")
+    let parser = argparse.ArgumentParser()
+    
+    parser.add_argument("--resume")
+    parser.add_argument("--config")
+    parser.add_argument("--base_path")
+    let args = parser.parse_args(["--resume", "../SiamMask/model_sharp/checkpoint_e20.pth", "--config", "../SiamMask/experiments/siammask_sharp/config_vot.json", "--base_path", "./OIST_Data/downsampled"])
+
+    print("ARGUMENTS", args)
+
+
+    print(Python.version)
+    print("hello")
+    let evalTracker: Tracker = { frames, start in
+
+        //SIAM MASK TRACKER IS HERE
+        let device = torch.device("cpu")
+        torch.backends.cudnn.benchmark = true
+
+        // # Setup Model
+        let cfg = cfhelper.load_config(args)
+        let custom = Python.import("SiamMask.experiments.siammask_sharp.custom")
+        var siammask = custom.Custom(anchors: cfg["anchors"])
+        siammask = ldhelper.load_pretrain(siammask, args.resume)
+
+        siammask.eval().to(device)
+        let init_rect = Python.tuple([Int(start.center.t.x)-20, Int(start.center.t.y)-20, 40, 70])
+        let tup = init_rect.tuple4
+        let x = tup.0
+        let y = tup.1
+        let w = tup.2
+        let h = tup.3
+
+        var state: PythonObject = 0
+        var results = [PythonObject]()
+
+        for (f, im) in frames.enumerated() {
+
+            let im_np = im.makeNumpyArray()
+            let im_3d = np.squeeze(np.stack(Python.tuple([im_np, im_np, im_np]), axis: 2))
+
+            if f == 0 { // init
+                let target_pos = np.array([x + w / 2, y + h / 2])
+                let target_sz = np.array([w, h])
+                state = smtest.siamese_init(im_3d, target_pos, target_sz, siammask, cfg["hp"], device: device)  //# init tracker
+                results.append(Python.tuple([Int(x + w / 2)!, Int(y + h / 2)!]))
+            } else if f > 0 {  //# tracking
+                state = smtest.siamese_track(state, im_3d, mask_enable: true, refine_enable: true, device: device)  //# track
+                let location = state["ploygon"].flatten()
+                
+
+                results.append(location)
+
+                
+            }
+            
+        }
+
+        var track = [OrientedBoundingBox]()
+        for (i, result) in results.enumerated() {
+          if i > 0 {
+            let location = result
+            let centx = Int((location[0]+location[2]+location[4]+location[6])/4)!
+            let centy = Int((location[1]+location[3]+location[5]+location[7])/4)!
+            let dx1 = location[0]-location[2]
+            let dy1 = location[1]-location[3]
+            let dx2 = location[0]-location[6]
+            let dy2 = location[1]-location[7]
+            let dist1 = sqrt(pow(Double(dx1)!, 2) + pow(Double(dy1)!, 2))
+            let dist2 = (pow(Double(dx2)!, 2) + pow(Double(dy2)!, 2)).squareRoot()
+            let locx: Int
+            let locy: Int
+            let rows: Int
+            let cols: Int
+            if dist1 < dist2 {
+                locx = Int((location[0]+location[2])/2)!
+                locy = Int((location[1]+location[3])/2)!
+                rows = Int(dist1)
+                cols = Int(dist2)
+            } else {
+                locx = Int((location[0]+location[6])/2)!
+                locy = Int((location[1]+location[7])/2)! 
+                rows = Int(dist2)
+                cols = Int(dist1)
+            }
+            let dx = Double(abs(locx - centx))
+            let dy = Double(abs(locy - centy))
+            var theta = Double.pi/2
+            print("polygon", result)
+            print("center", centx, centy)
+            print("dx and dy",  dx, dy)
+            print("theta initial", theta)
+            if dx != 0 {
+                theta = atan(dy/dx)
+            }
+            
+            if locx >= centx && locy < centy{
+                theta = -theta
+            } else if locx < centx && locy >= centy{
+                theta = .pi - theta
+            } else if locx < centx && locy < centy{
+                theta = .pi + theta
+            }
+            print("theta final", theta)
+
+            let rot = Rot2(theta)
+            let vect = Vector2(Double(centx), Double(centy))
+            print("rotation", rot, "\n\n")
+            let center = Pose2(rot, vect)
+            let swiftBB = OrientedBoundingBox(center: center, rows: rows, cols: cols)
+            track.append(swiftBB)
+          } else {
+            let swiftBB = start
+            track.append(swiftBB)
+          }
+        }
+        return track
+    }
+
+    let plt = Python.import("matplotlib.pyplot")
+    let sequenceCount = 20
+    var eval_results = trackerEvaluation.evaluate(evalTracker, sequenceCount: sequenceCount, deltaAnchor: 175, outputFile: "brando03")
+    print("done evaluating")
+    var total_overlap = eval_results.sequences.prefix(sequenceCount)[0].subsequences.first!.metrics.overlap
+
+    for (index, value) in eval_results.sequences.prefix(sequenceCount).enumerated() {
+
+      print("done,", index)      
+      let (fig, axes) = plt.subplots(1, 2, figsize: Python.tuple([20, 20])).tuple2
+      fig.suptitle("Tracking positions and Subsequence Average Overlap with Accuracy \(String(format: "%.2f", value.subsequences.first!.metrics.accuracy)) and Robustness \(value.subsequences.first!.metrics.robustness).")
+      value.subsequences.map {
+        //zip($0.prediction, $0.groundTruth).enumerated().map{($0.0, $0.1.0.center, $0.1.1.center)})
+        let encoder = JSONEncoder()
+        let data = try! encoder.encode($0.prediction)
+        FileManager.default.createFile(atPath: "Results/brando03/prediction_siammask_sequence_\(index).json", contents: data, attributes: nil)
+      }
+      value.subsequences.map {
+        plotPoseDifference(
+          track: $0.prediction.map{$0.center}, withGroundTruth: $0.groundTruth.map{$0.center}, on: axes[0]
+        )
+      }
+      plotOverlap(
+          metrics: value.subsequences.first!.metrics, on: axes[1]
+      )
+
+      fig.savefig("Results/brando03/brando03_subsequence\(index).png", bbox_inches: "tight")
+      print("Accuracy for sequence is \(value.sequenceMetrics.accuracy) with Robustness of \(value.sequenceMetrics.robustness)")
+    }
+    print("Accuracy for all sequences is \(eval_results.trackerMetrics.accuracy) with Robustness of \(eval_results.trackerMetrics.robustness)")
+    
+    let pickle = Python.import("pickle");
+    let f = Python.open("Results/EAO/siammask.data", "wb")
+    pickle.dump(eval_results.expectedAverageOverlap.curve, f)
+
+    
+    // var average_overlap = [Double]()
+    // for (i, val) in total_overlap.enumerated() {
+    //   average_overlap.append(val/Double(sequenceCount))
+    // }
+    // let (fig, ax) = plt.subplots().tuple2
+    // ax.plot(average_overlap)
+    // ax.set_title("Overlap")
+    // fig.savefig("average_overlap.png")
+
+
+
+    
+
+  }
+
+}
\ No newline at end of file
diff --git a/Scripts/Brando04.swift b/Scripts/Brando04.swift
new file mode 100644
index 00000000..d1354f30
--- /dev/null
+++ b/Scripts/Brando04.swift
@@ -0,0 +1,108 @@
+import ArgumentParser
+
+import SwiftFusion
+import BeeDataset
+import BeeTracking
+import TensorFlow
+import PythonKit
+import Foundation
+
+
+
+
+/// Brando04: NNClassifier training
+struct Brando04: ParsableCommand {
+  typealias LikelihoodModel = TrackingLikelihoodModel<PretrainedDenseRAE, MultivariateGaussian, GaussianNB>
+
+
+  @Flag(help: "Training mode")
+  var training: Bool = false
+
+  let num_boxes: Int = 10000
+
+  func getTrainingDataBG(
+    from dataset: OISTBeeVideo
+  ) -> (Tensor<Float>, Tensor<Double>) {
+    print("bg")
+
+    let bgBoxes = dataset.makeBackgroundBoundingBoxes(patchSize: (40, 70), batchSize: num_boxes).map {
+      $0.frame!.patch(at: $0.obb)
+    }
+    print("bg2")
+    let labels = Tensor<Float>(zeros: [num_boxes])
+    print("labels done bg")
+    let patches = Tensor<Double>(stacking: bgBoxes.map {$0})
+    print("patches done bg")
+    return (labels, patches)
+  }
+  
+
+
+  func getTrainingDataFG(
+    from dataset: OISTBeeVideo
+  ) -> (Tensor<Float>, Tensor<Double>) {
+    print("fg")
+    let bgBoxes = dataset.makeForegroundBoundingBoxes(patchSize: (40, 70), batchSize: num_boxes).map {
+      $0.frame!.patch(at: $0.obb)
+    }
+    print("bg2")
+    let labels = Tensor<Float>(zeros: [num_boxes])
+    print("labels done bg")
+    let patches = Tensor<Double>(stacking: bgBoxes.map {$0})
+    print("patches done bg")
+    return (labels, patches)
+  }
+
+
+
+  // Just runs an RP tracker and saves image to file
+  // Make sure you have a folder `Results/fan12` before running
+  func run() {
+    let folderName = "classifiers/classifiers_today"
+    if !FileManager.default.fileExists(atPath: folderName) {
+      do {
+          try FileManager.default.createDirectory(atPath: folderName, withIntermediateDirectories: true, attributes: nil)
+      } catch {
+          print(error.localizedDescription)
+      }
+    } else {
+      print("folder exists")
+    }
+    
+    
+    let dataDir = URL(fileURLWithPath: "./OIST_Data")
+    let trainingDataset = OISTBeeVideo(directory: dataDir, length: 100)!
+    var (labels_fg, patches_fg) = getTrainingDataFG(from: trainingDataset)
+    var (labels_bg, patches_bg) = getTrainingDataBG(from: trainingDataset)
+    
+
+    var patches = Tensor(stacking: patches_bg.unstacked() + patches_fg.unstacked())
+    var labels = Tensor<Int8>(concatenate(labels_bg, labels_fg))
+    print("shape of patches", patches.shape)
+    print("shape of labels", labels.shape)
+
+    let kHiddenDimension = 512
+    let featSize = 512
+    let iterations = [5,6,7]
+
+    for i in iterations {
+      let path = "./classifiers/classifiers_today/classifier_weight_\(kHiddenDimension)_\(featSize)_\(i)_60000boxes_600epochs.npy"
+      if FileManager.default.fileExists(atPath: path) {
+          print("File Already Exists. Abort training")
+          continue
+      }
+      print("Training...")
+      let rae: PretrainedNNClassifier = PretrainedNNClassifier(
+        patches: patches,
+        labels: labels,
+        given: PretrainedNNClassifier.HyperParameters(hiddenDimension: kHiddenDimension, latentDimension: featSize, weightFile: "", learningRate: 1e-3),
+        train_mode: "from_scratch"
+      )
+      rae.save(to: path)
+
+    }
+
+    
+    
+  }
+}
diff --git a/Scripts/Brando05.swift b/Scripts/Brando05.swift
new file mode 100644
index 00000000..b8a30e9e
--- /dev/null
+++ b/Scripts/Brando05.swift
@@ -0,0 +1,119 @@
+import ArgumentParser
+
+import SwiftFusion
+import BeeDataset
+import BeeTracking
+import TensorFlow
+import PythonKit
+import Foundation
+
+import PenguinStructures
+
+/// Brando05: TRACKING with NN Classifier
+struct Brando05: ParsableCommand {
+  @Option(help: "Run for number of frames")
+  var trackLength: Int = 80
+
+  // Runs NNClassifier tracker on n number of sequences and outputs relevant images and statistics
+  func run() {
+    let np = Python.import("numpy")
+    let featureSizes = [256]
+    let kHiddenDimensions = [512]
+    let iterations = [1]
+    let trainingDatasetSize = 100
+
+    let dataDir = URL(fileURLWithPath: "./OIST_Data")
+    let data = OISTBeeVideo(directory: dataDir, length: trainingDatasetSize)!
+    let testData = OISTBeeVideo(directory: dataDir, afterIndex: trainingDatasetSize, length: trackLength)!
+
+    let trackerEvaluation = TrackerEvaluationDataset(testData)
+
+    for featureSize in featureSizes {
+    for kHiddenDimension in kHiddenDimensions {
+    for j in iterations {
+
+
+    let (imageHeight, imageWidth, imageChannels) =
+      (40, 70, 1)
+
+    var classifier = NNClassifier(
+      imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels, hiddenDimension: kHiddenDimension, latentDimension: featureSize
+    )
+    // LOAD THE CLASSIFIER
+    classifier.load(weights: np.load("./classifiers/classifiers_today/large_classifier_weight_\(kHiddenDimension)_\(featureSize)_\(j).npy", allow_pickle: true))
+
+    let evalTracker: Tracker = {frames, start in
+        var tracker = makeProbabilisticTracker2(
+            model: classifier,
+            frames: frames,
+            targetSize: (40, 70)
+        )
+        let prediction = tracker.infer(knownStart: Tuple1(start.center), withSampling: true)
+        let track = tracker.frameVariableIDs.map { OrientedBoundingBox(center: prediction[unpack($0)], rows: 40, cols:70) }
+        return track
+
+    }
+
+    let plt = Python.import("matplotlib.pyplot")
+    let sequenceCount = 1
+    var results = trackerEvaluation.evaluate(evalTracker, sequenceCount: sequenceCount, deltaAnchor: 175, outputFile: "classifier")
+
+
+    for (index, value) in results.sequences.prefix(sequenceCount).enumerated() {
+      let folderName = "Results/classifier/classifier_\(kHiddenDimension)_\(featureSize)_\(j)_10000sampling"
+      print(folderName)
+      if !FileManager.default.fileExists(atPath: folderName) {
+      do {
+          try FileManager.default.createDirectory(atPath: folderName, withIntermediateDirectories: true, attributes: nil)
+          try FileManager.default.createDirectory(atPath: folderName + "/sequence0", withIntermediateDirectories: true, attributes: nil)
+      } catch {
+          print(error.localizedDescription)
+      }
+      }
+
+      var i: Int = 0
+      zip(value.subsequences.first!.frames, zip(value.subsequences.first!.prediction, value.subsequences.first!.groundTruth)).map {
+        let (fig, axes) = plotFrameWithPatches(frame: $0.0, actual: $0.1.0.center, expected: $0.1.1.center, firstGroundTruth: value.subsequences.first!.groundTruth.first!.center)
+        fig.savefig(folderName + "/sequence\(index)/classifier_\(i).png", bbox_inches: "tight")
+        plt.close("all")
+        i = i + 1
+      }
+      
+      let (fig, axes) = plt.subplots(1, 2, figsize: Python.tuple([20, 20])).tuple2
+      fig.suptitle("Tracking positions and Subsequence Average Overlap with Accuracy \(String(format: "%.2f", value.subsequences.first!.metrics.accuracy)) and Robustness \(value.subsequences.first!.metrics.robustness).")
+      
+      value.subsequences.map {
+        plotPoseDifference(
+          track: $0.prediction.map{$0.center}, withGroundTruth: $0.groundTruth.map{$0.center}, on: axes[0]
+        )
+      }
+      plotOverlap(
+          metrics: value.subsequences.first!.metrics, on: axes[1]
+      )
+      
+      fig.savefig(folderName + "/classifier_\(kHiddenDimension)_\(featureSize)_\(j)subsequence\(index).png", bbox_inches: "tight")
+      print("Accuracy for sequence is \(value.sequenceMetrics.accuracy) with Robustness of \(value.sequenceMetrics.robustness)")
+    }
+
+    print("Accuracy for all sequences is \(results.trackerMetrics.accuracy) with Robustness of \(results.trackerMetrics.robustness)")
+    
+
+
+
+    }
+    }
+    }
+
+
+    
+  }
+}
+
+/// Returns `t` as a Swift tuple.
+fileprivate func unpack<A, B>(_ t: Tuple2<A, B>) -> (A, B) {
+  return (t.head, t.tail.head)
+}
+/// Returns `t` as a Swift tuple.
+fileprivate func unpack<A>(_ t: Tuple1<A>) -> (A) {
+  return (t.head)
+}
diff --git a/Scripts/Brando06.swift b/Scripts/Brando06.swift
new file mode 100644
index 00000000..811469a4
--- /dev/null
+++ b/Scripts/Brando06.swift
@@ -0,0 +1,180 @@
+import ArgumentParser
+
+import SwiftFusion
+import BeeDataset
+import BeeTracking
+import TensorFlow
+import PythonKit
+import Foundation
+
+
+import PenguinStructures
+
+// This script produces HISTOGRAMS for the output of NN Classifiers
+struct Brando06: ParsableCommand {    
+
+    func run() {
+        let dataDir = URL(fileURLWithPath: "./OIST_Data")
+        let testData = OISTBeeVideo(directory: dataDir, afterIndex: 100, length: 80)!
+        let batchSize = 3000
+        let fgBoxes = testData.makeForegroundBoundingBoxes(patchSize: (40, 70), batchSize: batchSize)
+        let bgBoxes = testData.makeBackgroundBoundingBoxes(patchSize: (40, 70), batchSize: batchSize)
+        let fgpatches = Tensor<Double>(stacking: fgBoxes.map { $0.frame!.patch(at: $0.obb)})
+        let bgpatches = Tensor<Double>(stacking: bgBoxes.map { $0.frame!.patch(at: $0.obb)})
+        let np = Python.import("numpy")
+        let kHiddenDimensions = [512]
+        let featSizes = [512]
+        var plt = Python.import("matplotlib.pyplot")
+        
+        
+        for i in featSizes {
+        for j in kHiddenDimensions {
+        for num in 1...1 {
+
+            let featureSize = i
+            let kHiddenDimension = j
+            
+
+            let (imageHeight, imageWidth, imageChannels) =
+            (40, 70, 1)
+
+            var classifier = NNClassifier(
+            imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels,
+            hiddenDimension: kHiddenDimension, latentDimension: featureSize
+            )
+            var weightsFile: String?
+            if let weightsFile = weightsFile {
+            classifier.load(weights: np.load(weightsFile, allow_pickle: true))
+            } else {
+            classifier.load(weights: np.load("./classifiers/classifiers_today/classifier_weight_512_512_1_doubletraining.npy", allow_pickle: true))
+            }
+
+            let outfg = classifier.classify(fgpatches)
+            let outbg = classifier.classify(bgpatches)
+            let softmaxfg = softmax(outfg)
+            let softmaxbg = softmax(outbg)
+            let folderName = "Results/brando06/classified_images"
+            if !FileManager.default.fileExists(atPath: folderName) {
+            do {
+                try FileManager.default.createDirectory(atPath: folderName, withIntermediateDirectories: true, attributes: nil)
+            } catch {
+                print(error.localizedDescription)
+            }
+            }
+            for i in 0...30 {
+                //Background
+                var (fig, ax) = plt.subplots(figsize: Python.tuple([8, 4])).tuple2
+                var patch = bgpatches[i,0...,0...,0]
+                var fr = np.squeeze(patch.makeNumpyArray())
+                ax.imshow(fr / 255.0, cmap: "gray")
+                ax.set_title("background image: \noutput index 0: \(softmaxbg[i][0])\noutput index 1: \(softmaxbg[i][1])")
+                fig.savefig(folderName + "/bgpatch\(i).png", bbox_inches: "tight")
+                plt.close("all")
+                //Foreground
+                (fig, ax) = plt.subplots(figsize: Python.tuple([8, 4])).tuple2
+                patch = fgpatches[i,0...,0...,0]
+                fr = np.squeeze(patch.makeNumpyArray())
+                ax.imshow(fr / 255.0, cmap: "gray")
+                ax.set_title("foreground image: \noutput index 0: \(softmaxfg[i][0])\noutput index 1: \(softmaxfg[i][1])")
+                fig.savefig(folderName + "/fgpatch\(i).png", bbox_inches: "tight")
+                plt.close("all")
+            }
+
+            let shapefg = outfg.shape
+            let shapebg = outbg.shape
+
+            var fgsum0 = 0.0
+            var fgsum1 = 0.0
+            var bgsum0 = 0.0
+            var bgsum1 = 0.0
+            var fg0_arr = [Double]()
+            var fg1_arr = [Double]()
+            var bg0_arr = [Double]()
+            var bg1_arr = [Double]()
+            for i in 0...batchSize-1 {
+                fgsum0 += Double(softmaxfg[i,0])!
+                fgsum1 += Double(softmaxfg[i,1])!
+                bgsum0 += Double(softmaxbg[i,0])!
+                bgsum1 += Double(softmaxbg[i,1])!
+                fg0_arr.append(Double(softmaxfg[i,0])!)
+                fg1_arr.append(Double(softmaxfg[i,1])!)
+                bg0_arr.append(Double(softmaxbg[i,0])!)
+                bg1_arr.append(Double(softmaxbg[i,1])!)
+            }
+            print("featSize", featureSize, "kHiddendimension", kHiddenDimension, "num", num, "val", fgsum1 + bgsum0 - fgsum0 - bgsum1)
+
+            
+
+
+            print("feature size", featureSize)
+            print("fgsum1", fgsum1, "fgsum0", fgsum0)
+            print("bgsum1", bgsum1, "bgsum0", bgsum0)
+
+            var (figs, axs) = plt.subplots(2,2).tuple2
+            print("asda")
+
+            plt.subplots_adjust(left:0.1,
+                    bottom:0.1, 
+                    right:0.9, 
+                    top:0.9, 
+                    wspace:0.4, 
+                    hspace:0.4)
+            
+
+            var ax1 = axs[1,0]
+            ax1.hist(fg0_arr, range: Python.tuple([-1,1]), bins: 50)
+            var mean = fgsum0/Double(batchSize)
+            var sd = 0.0
+            for elem in fg0_arr {
+                sd += abs(elem - mean)/Double(batchSize)
+            }
+            ax1.set_title("Foreground. Output response for background. \n Mean = \(String(format: "%.2f", mean)) and SD = \(sd).", fontsize:8)
+
+            ax1 = axs[0,0]
+            ax1.hist(fg1_arr, range: Python.tuple([-1,1]), bins: 50)
+            mean = fgsum1/Double(batchSize)
+            sd = 0.0
+            for elem in fg1_arr {
+                sd += abs(elem - mean)/Double(batchSize)
+            }
+            ax1.set_title("Foreground. Output response for foreground. \n Mean = \(String(format: "%.2f", mean)) and SD = \(sd).", fontsize:8)
+
+            ax1 = axs[1,1]
+            ax1.hist(bg0_arr, range: Python.tuple([-1,1]), bins: 50)
+            mean = bgsum0/Double(batchSize)
+            sd = 0.0
+            for elem in bg0_arr {
+                sd += abs(elem - mean)/Double(batchSize)
+            }
+            ax1.set_title("Background. Output response for background. \n Mean = \(String(format: "%.2f", mean)) and SD = \(sd).", fontsize:8)
+
+            ax1 = axs[0,1]
+
+            ax1.hist(bg1_arr, range: Python.tuple([-1,1]), bins: 50)
+            mean = bgsum1/Double(batchSize)
+            sd = 0.0
+            for elem in bg1_arr {
+                sd += abs(elem - mean)/Double(batchSize)
+            }
+            ax1.set_title("Background. Output response for foreground. \n Mean = \(String(format: "%.2f", mean)) and SD = \(sd).", fontsize:8)
+
+            figs.savefig("hist_softmax_\(kHiddenDimension)_\(featureSize)_\(num).png")
+            plt.close(figs)
+
+
+
+        }
+        }
+        }
+        
+
+
+        
+
+        
+
+
+
+
+    }
+}
diff --git a/Scripts/Brando07.swift b/Scripts/Brando07.swift
new file mode 100644
index 00000000..e2e55e31
--- /dev/null
+++ b/Scripts/Brando07.swift
@@ -0,0 +1,172 @@
+import ArgumentParser
+
+import SwiftFusion
+import BeeDataset
+import BeeTracking
+import TensorFlow
+import PythonKit
+import Foundation
+
+import PenguinStructures
+
+/// Brando07: RAE + Prob density histograms
+struct Brando07: ParsableCommand {
+  @Option(help: "Run for number of frames")
+  var trackLength: Int = 80
+  
+  @Option(help: "Size of feature space")
+  var featureSize: Int = 256
+
+  @Option(help: "Pretrained weights")
+  var weightsFile: String?
+
+  func run() {
+    let np = Python.import("numpy")
+    let kHiddenDimension = 512
+
+    let (imageHeight, imageWidth, imageChannels) =
+      (40, 70, 1)
+
+    
+    var rae = DenseRAE(
+      imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels,
+      hiddenDimension: kHiddenDimension, latentDimension: featureSize
+    )
+
+    if let weightsFile = weightsFile {
+      rae.load(weights: np.load(weightsFile, allow_pickle: true))
+    } else {
+      rae.load(weights: np.load("./oist_rae_weight_\(featureSize).npy", allow_pickle: true))
+    }
+    print("s")
+
+    let trainingDatasetSize = 100
+
+    let dataDir = URL(fileURLWithPath: "./OIST_Data")
+    let numberOfTrainingSamples = 3000
+
+    let dataset = OISTBeeVideo(directory: dataDir, length: 100)! // calling this twice caused the Killed to happen
+    let batchSize = 3000
+    let fgBoxes = dataset.makeForegroundBoundingBoxes(patchSize: (40, 70), batchSize: batchSize)
+    print("here 1.5")
+    let bgBoxes = dataset.makeBackgroundBoundingBoxes(patchSize: (40, 70), batchSize: batchSize)
+    print("tests here2")
+    let fgpatches = Tensor<Double>(stacking: fgBoxes.map { $0.frame!.patch(at: $0.obb)})
+    let bgpatches = Tensor<Double>(stacking: bgBoxes.map { $0.frame!.patch(at: $0.obb)})
+    print("patches complete")
+
+
+    let batchPositive = rae.encode(fgpatches)
+    print("shape batch positive", batchPositive.shape)
+    let foregroundModel = MultivariateGaussian(from:batchPositive, regularizer: 1e-3)
+    let batchNegative = rae.encode(bgpatches)
+    let backgroundModel = MultivariateGaussian(from: batchNegative, regularizer: 1e-3)
+
+    var outfg0 = [Double]()
+    var outfg1 = [Double]()
+    var outbg0 = [Double]()
+    var outbg1 = [Double]()
+    print(batchPositive[0,0...].shape)
+    print(backgroundModel.probability(batchPositive[0,0...]))
+    print(foregroundModel.probability(batchPositive[0,0...]))
+
+    for i in 0...numberOfTrainingSamples-1 {
+        outfg0.append(backgroundModel.probability(batchPositive[i,0...]))
+        outfg1.append(foregroundModel.probability(batchPositive[i,0...]))
+        outbg0.append(backgroundModel.probability(batchNegative[i,0...]))
+        outbg1.append(foregroundModel.probability(batchNegative[i,0...]))
+    }
+
+
+    var plt = Python.import("matplotlib.pyplot")
+
+
+    var fgsum0 = 0.0
+    var fgsum1 = 0.0
+    var bgsum0 = 0.0
+    var bgsum1 = 0.0
+    var fg0_arr = [Double]()
+    var fg1_arr = [Double]()
+    var bg0_arr = [Double]()
+    var bg1_arr = [Double]()
+    for i in 0...batchSize-1 {
+        fgsum0 += (outfg0[i])
+        fgsum1 += (outfg1[i])
+        bgsum0 += (outbg0[i])
+        bgsum1 += (outbg1[i])
+        fg0_arr.append((outfg0[i]))
+        fg1_arr.append((outfg1[i]))
+        bg0_arr.append((outbg0[i]))
+        bg1_arr.append((outbg1[i]))
+    }
+    print("featSize", featureSize, "kHiddendimension", kHiddenDimension, "val", fgsum1 + bgsum0 - fgsum0 - bgsum1)
+
+
+
+
+    print("feature size", featureSize)
+    print("fgsum1", fgsum1, "fgsum0", fgsum0)
+    print("bgsum1", bgsum1, "bgsum0", bgsum0)
+
+    var (figs, axs) = plt.subplots(2,2).tuple2
+    print("asda")
+
+    plt.subplots_adjust(left:0.1,
+            bottom:0.1, 
+            right:0.9, 
+            top:0.9, 
+            wspace:0.4, 
+            hspace:0.4)
+
+
+    var ax1 = axs[1,0]
+    ax1.hist(fg0_arr, range: Python.tuple([-1,1]), bins: 50)
+    var mean = fgsum0/Double(batchSize)
+    var sd = 0.0
+    for elem in fg0_arr {
+        sd += abs(elem - mean)/Double(batchSize)
+    }
+    ax1.set_title("Foreground. Output response for background. \n Mean = \(String(format: "%.2f", mean)) and SD = \(sd).", fontsize:8)
+
+    ax1 = axs[0,0]
+    ax1.hist(fg1_arr, range: Python.tuple([-1,1]), bins: 50)
+    mean = fgsum1/Double(batchSize)
+    sd = 0.0
+    for elem in fg1_arr {
+        sd += abs(elem - mean)/Double(batchSize)
+    }
+    ax1.set_title("Foreground. Output response for foreground. \n Mean = \(String(format: "%.2f", mean)) and SD = \(sd).", fontsize:8)
+
+    ax1 = axs[1,1]
+    ax1.hist(bg0_arr, range: Python.tuple([-1,1]), bins: 50)
+    mean = bgsum0/Double(batchSize)
+    sd = 0.0
+    for elem in bg0_arr {
+        sd += abs(elem - mean)/Double(batchSize)
+    }
+    ax1.set_title("Background. Output response for background. \n Mean = \(String(format: "%.2f", mean)) and SD = \(sd).", fontsize:8)
+
+    ax1 = axs[0,1]
+
+    ax1.hist(bg1_arr, range: Python.tuple([-1,1]), bins: 50)
+    mean = bgsum1/Double(batchSize)
+    sd = 0.0
+    for elem in bg1_arr {
+        sd += abs(elem - mean)/Double(batchSize)
+    }
+    ax1.set_title("Background. Output response for foreground. \n Mean = \(String(format: "%.2f", mean)) and SD = \(sd).", fontsize:8)
+
+    figs.savefig("hist_rae_\(kHiddenDimension)_\(featureSize).png")
+    plt.close(figs)
+
+
+
+  }
+
+
+
+
+
+}
+
+
diff --git a/Scripts/Brando08.swift b/Scripts/Brando08.swift
new file mode 100644
index 00000000..f4d414c1
--- /dev/null
+++ b/Scripts/Brando08.swift
@@ -0,0 +1,47 @@
+import ArgumentParser
+
+import SwiftFusion
+import BeeDataset
+import BeeTracking
+import TensorFlow
+import PythonKit
+import Foundation
+
+
+import PenguinStructures
+
+// PRINT IMAGE PATCHES TO VISUALIZE
+struct Brando08: ParsableCommand {    
+
+    func run() {
+        let dataDir = URL(fileURLWithPath: "./OIST_Data")
+        let dataset = OISTBeeVideo(directory: dataDir, length: 100)!
+        let batchSize = 300
+        let fgBoxes = dataset.makeForegroundBoundingBoxes(patchSize: (40, 70), batchSize: batchSize)
+        let bgBoxes = dataset.makeBackgroundBoundingBoxes(patchSize: (40, 70), batchSize: batchSize)
+        let fgpatches = Tensor<Double>(stacking: fgBoxes.map { $0.frame!.patch(at: $0.obb)})
+        let bgpatches = Tensor<Double>(stacking: bgBoxes.map { $0.frame!.patch(at: $0.obb)})
+        let np = Python.import("numpy")
+        var plt = Python.import("matplotlib.pyplot")
+        let mpl = Python.import("matplotlib")
+
+        print(fgpatches.shape)
+        for i in batchSize-100...batchSize-1 {
+            let (fig, ax) = plt.subplots(figsize: Python.tuple([8, 4])).tuple2
+            let patch = bgpatches[i,0...,0...,0]
+            let fr = np.squeeze(patch.makeNumpyArray())
+            ax.imshow(fr / 255.0, cmap: "gray")
+            let folderName = "Results/brando08/bgpatches"
+            if !FileManager.default.fileExists(atPath: folderName) {
+            do {
+                try FileManager.default.createDirectory(atPath: folderName, withIntermediateDirectories: true, attributes: nil)
+            } catch {
+                print(error.localizedDescription)
+            }
+            }
+            fig.savefig("Results/brando08/bgpatches/patch\(i).png", bbox_inches: "tight")
+            plt.close("all")
+            
+        }
+    }
+}
diff --git a/Scripts/Brando10.swift b/Scripts/Brando10.swift
new file mode 100644
index 00000000..827f8dcd
--- /dev/null
+++ b/Scripts/Brando10.swift
@@ -0,0 +1,64 @@
+
+
+
+import ArgumentParser
+
+import SwiftFusion
+import BeeDataset
+import BeeTracking
+import TensorFlow
+import PythonKit
+import Foundation
+
+import PenguinStructures
+
+/// Brando10: Plot the samplings in progress.
+struct Brando10: ParsableCommand {
+  @Option(help: "Run for number of frames")
+  var trackLength: Int = 80
+
+  // Runs NNClassifier tracker on n number of sequences and outputs relevant images and statistics
+  func run() {
+    let np = Python.import("numpy")
+    let featureSizes = [512]
+    let kHiddenDimensions = [512]
+    let iterations = [1]
+    let trainingDatasetSize = 100
+
+    let dataDir = URL(fileURLWithPath: "./OIST_Data")
+    let data = OISTBeeVideo(directory: dataDir, length: trainingDatasetSize)!
+    let testData = OISTBeeVideo(directory: dataDir, afterIndex: trainingDatasetSize, length: trackLength)!
+
+    let trackerEvaluation = TrackerEvaluationDataset(testData)
+
+    for i in 0...78 {
+        let folderName = "./sampling_512_512_2000samples"
+        let posex_np = np.load(folderName + "/sampling_frame_\(i)_posex.npy")
+        let posey_np = np.load(folderName + "/sampling_frame_\(i)_posey.npy")
+        let posetheta_np = np.load(folderName + "/sampling_frame_\(i)_posetheta.npy")
+        let error_np = np.load(folderName + "/sampling_frame_\(i)_error.npy")
+        let t = np.arange(0, 2000, 1)
+        
+
+        let plt = Python.import("matplotlib.pyplot")
+        var (figs, axs) = plt.subplots(1,1, figsize: Python.tuple([10, 4])).tuple2
+
+
+        axs[0].plot(t,error_np, linewidth: 1)
+        axs[0].set_title("error")
+        plt.subplots_adjust(left:0.1,
+            bottom:0.1, 
+            right:0.9, 
+            top:0.9, 
+            wspace:0.4, 
+            hspace:0.4)
+
+        figs.savefig(folderName + "/sampling_figure_\(i).png")
+        plt.close("all")
+
+    }
+    
+
+
+  }
+}
\ No newline at end of file
diff --git a/Scripts/Brando11.swift b/Scripts/Brando11.swift
new file mode 100644
index 00000000..671be2b5
--- /dev/null
+++ b/Scripts/Brando11.swift
@@ -0,0 +1,42 @@
+import ArgumentParser
+import SwiftFusion
+import BeeDataset
+import BeeTracking
+import TensorFlow
+import PythonKit
+import Foundation
+import PenguinStructures
+
+/// Brando11: compute the mean displacement
+struct Brando11: ParsableCommand {
+  @Option(help: "Run for number of frames")
+  var trackLength: Int = 80
+
+  func run() {
+    let np = Python.import("numpy")
+    let plt = Python.import("matplotlib.pyplot")
+    let trainingDatasetSize = 100
+
+    // LOAD THE IMAGE AND THE GROUND TRUTH ORIENTED BOUNDING BOX
+    let dataDir = URL(fileURLWithPath: "./OIST_Data")
+    let data = OISTBeeVideo(directory: dataDir, afterIndex: trainingDatasetSize, length: trackLength)!
+    var dX = [Double]()
+    var dY = [Double]()
+    var dTheta = [Double]()
+    for track in data.tracks  {
+        var prevObb: OrientedBoundingBox?
+        prevObb = nil
+        for obb in track.boxes {
+            if prevObb == nil {
+                prevObb = obb
+            } else {
+                dX.append(obb.center.t.x - (prevObb)!.center.t.x)
+                dY.append(obb.center.t.y - (prevObb)!.center.t.y)
+                dTheta.append(obb.center.rot.theta - (prevObb)!.center.rot.theta)
+            }
+        }
+    }
+    // Plot histogram.
+
+  }
+}
diff --git a/Scripts/Brando12.swift b/Scripts/Brando12.swift
new file mode 100644
index 00000000..fad930f2
--- /dev/null
+++ b/Scripts/Brando12.swift
@@ -0,0 +1,311 @@
+import ArgumentParser
+import SwiftFusion
+import BeeDataset
+import BeeTracking
+import TensorFlow
+import PythonKit
+import Foundation
+import PenguinStructures
+
+/// Brando12: OPTIMIZATION CONVERGENCE VISUALIZATION [red & green dots]
+struct Brando12: ParsableCommand {
+  @Option(help: "Run for number of frames")
+  var trackLength: Int = 80
+
+  @Option(help: "Classifier or rae")
+  var useClassifier: Bool = true
+
+  func initialize_and_perturb(p: Pose2) -> (Double, Double, Double, Pose2, VariableAssignments, TypedID<Pose2>, FactorGraph) {
+    // CREATE A PLACEHOLDER FOR POSE
+    var v = VariableAssignments()
+    let poseId = v.store(p)
+    v[poseId].perturbWith(stddev: Vector3(0.3, 8, 4.6))
+    let dx = v[poseId].t.x - p.t.x
+    let dy = v[poseId].t.y - p.t.y
+    let dtheta = v[poseId].rot.theta - p.rot.theta
+    let startpose = v[poseId]
+    let fg = FactorGraph()
+
+    return (dx, dy, dtheta, startpose, v, poseId, fg)
+  }
+
+  func initialize_empty_arrays() -> (Bool, [Double], [Double], [Double], [Double]) {
+    var conv = true
+    var errors = [Double]()
+    var xs = [Double]()
+    var ys = [Double]()
+    var thetas = [Double]()
+    return (conv, errors, xs, ys, thetas)
+  }
+
+
+
+  func run() {
+    let np = Python.import("numpy")
+    let plt = Python.import("matplotlib.pyplot")
+    let trainingDatasetSize = 100
+
+    // LOAD THE IMAGE AND THE GROUND TRUTH ORIENTED BOUNDING BOX
+    let dataDir = URL(fileURLWithPath: "./OIST_Data")
+    let testData = OISTBeeVideo(directory: dataDir, afterIndex: trainingDatasetSize, length: trackLength)!
+    let data = OISTBeeVideo(directory: dataDir, length: trainingDatasetSize)!
+    let frames = testData.frames
+    let firstTrack = testData.tracks[0]
+    let firstFrame = frames[0]
+    let firstObb = firstTrack.boxes[0]
+      
+
+    //OPTIMIZER GRADIENT DESCENT
+    let lr = 1e-7
+    var optimizer = GradientDescent(learningRate: lr)
+    let it_limit = 200
+
+
+    //CREATE A FOLDER TO CONTAIN THE END-RESULT IMAGES OF THE OPTIMIZATION
+    let str: String
+    if useClassifier{
+      str = "NNC"
+    } else {
+      str = "RAE"
+    }
+    let folderName = "Results/GD_optimization_\(str)_lr_\(lr)__3_09_2021_final_images_4subplots"
+    if !FileManager.default.fileExists(atPath: folderName) {
+    do {
+        try FileManager.default.createDirectory(atPath: folderName, withIntermediateDirectories: true, attributes: nil)
+    } catch {
+        print(error.localizedDescription)
+    }
+    }
+
+
+
+
+
+    //CREATE A FIG
+    print("hello1")
+    let (fig, axs) = plt.subplots(2,2).tuple2
+    let fr = np.squeeze(firstFrame.makeNumpyArray())
+    for i in 0...1 {
+      for j in 0...1 {
+        axs[i,j].imshow(fr / 255.0, cmap: "gray")
+        let firstGroundTruth = firstObb.center
+        axs[i,j].set_xlim(firstGroundTruth.t.x - 50, firstGroundTruth.t.x + 50)
+        axs[i,j].set_ylim(firstGroundTruth.t.y - 50, firstGroundTruth.t.y + 50)
+        axs[i,j].get_xaxis().set_visible(false)
+        axs[i,j].get_yaxis().set_visible(false)
+      }
+    }
+    axs[0,0].set_title("fabs(theta) < 0.1", fontsize:8)
+    axs[0,1].set_title("fabs(theta) < 0.2", fontsize:8)
+    axs[1,0].set_title("fabs(theta) < 0.3", fontsize:8)
+    axs[1,1].set_title("fabs(theta) >= 0.3", fontsize:8)
+
+    print("hello")
+    let xy_thresh = 20.0 //pixels
+    let theta_thresh = 0.5 //radians // consider doing overlap.
+
+    
+    // NN Params
+    let (imageHeight, imageWidth, imageChannels) = (40, 70, 1)
+    let featureSize = 256
+    let kHiddenDimension = 512
+
+
+    if useClassifier {
+      var classifier = NNClassifier(
+        imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels, hiddenDimension: kHiddenDimension, latentDimension: featureSize
+      )
+      classifier.load(weights: np.load("./classifiers/classifiers_today/classifier_weight_\(kHiddenDimension)_\(featureSize)_1_doubletraining.npy", allow_pickle: true))
+      
+      for j in 0...200 {
+        // RANDOMLY PERTURB THE GROUND TRUTH POSE AND CALCULATE THE PERTURBATION
+        var (dx, dy, dtheta, startpose, v, poseId, fg) = initialize_and_perturb(p: firstObb.center)
+        // CREATE THE FACTOR AND FACTOR GRAPH
+        let factorNNC = ProbablisticTrackingFactor2(poseId,
+          measurement: firstFrame,
+          classifier: classifier,
+          patchSize: (40, 70),
+          appearanceModelSize: (40, 70)
+        )
+        fg.store(factorNNC)
+
+
+        // PERFORM GRADIENT DESCENT
+      var (conv, errors, xs, ys, thetas) = initialize_empty_arrays()
+      print("starting optimization")
+      for i in 0..<it_limit {
+        errors.append(factorNNC.errorVector(v[poseId]).x)
+        xs.append(v[poseId].t.x)
+        ys.append(v[poseId].t.y)
+        thetas.append(v[poseId].rot.theta)
+        // print("iteration \(i) error:", factor.errorVector(v[poseId]).x, "x:", v[poseId].t.x, "y:", v[poseId].t.y, "theta:", v[poseId].rot.theta)
+        let oldpose = v[poseId]
+        optimizer.update(&v, objective: fg)
+        // WHEN DIFF IS SO SMALL, THE OPTIMIZATION HAS CONVERGED
+        if abs(v[poseId].t.x - oldpose.t.x) < 0.000001 && abs(v[poseId].t.y - oldpose.t.y) < 0.000001 && abs(v[poseId].rot.theta - oldpose.rot.theta) < 0.000001{
+          // print("converged on iteration number \(i). Final Error:", factor.errorVector(v[poseId]), "Initial error:", factor.errorVector(startpose))
+          break
+        }
+        if i == it_limit-1 {
+          conv = false
+          // print("no convergence :( Final Error:", factor.errorVector(v[poseId]), "Initial error:", factor.errorVector(startpose))
+        }
+      }
+      // PLOT THE FINAL OPTIMIZATION RESULT
+      let x_out_of_bounds = (v[poseId].t.x > firstObb.center.t.x + xy_thresh) || (v[poseId].t.x < firstObb.center.t.x - xy_thresh)
+      let y_out_of_bounds =  (v[poseId].t.y > firstObb.center.t.y + xy_thresh) || (v[poseId].t.y < firstObb.center.t.y - xy_thresh)
+      let theta_out_of_bounds = (v[poseId].rot.theta > firstObb.center.rot.theta + theta_thresh) || (v[poseId].rot.theta < firstObb.center.rot.theta - theta_thresh)
+      if !x_out_of_bounds && !theta_out_of_bounds && !y_out_of_bounds {
+
+          if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.1 {
+              axs[0,0].plot(startpose.t.x,startpose.t.y,"g,", ms: 1)
+          } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.2 {
+              axs[0,1].plot(startpose.t.x,startpose.t.y,"g,", ms: 1)
+          } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.3 {
+              axs[1,0].plot(startpose.t.x,startpose.t.y,"g,", ms: 1)
+          } else {
+              axs[1,1].plot(startpose.t.x,startpose.t.y,"g,", ms: 1)
+          }
+          
+      } else {
+
+          if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.1 {
+              axs[0,0].plot(startpose.t.x,startpose.t.y,"r,", ms: 1)
+          } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.2 {
+              axs[0,1].plot(startpose.t.x,startpose.t.y,"r,", ms: 1)
+          } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.3 {
+              axs[1,0].plot(startpose.t.x,startpose.t.y,"r,", ms: 1)
+          } else {
+              axs[1,1].plot(startpose.t.x,startpose.t.y,"r,", ms: 1)
+          }
+      }
+      let (figs, axes) = plotFrameWithPatches3(frame: firstFrame, start: startpose, end: v[poseId], expected: firstObb.center, firstGroundTruth: firstObb.center, errors: errors, xs: xs, ys: ys, thetas: thetas)
+      var final_err: Double
+      var label_err: Double
+      var start_err: Double
+
+      
+      final_err = factorNNC.errorVector(v[poseId]).x
+      label_err = factorNNC.errorVector(firstObb.center).x
+      start_err = factorNNC.errorVector(startpose).x
+
+      axes.set_title(String(axes.get_title())! + "\n final err = \(final_err)" 
+      + "\n label err = \(label_err).x)" 
+      + "\n start err = \(start_err)"
+      + "\n learning rate = \(lr)"
+      + "\n converged = \(conv)")
+      figs.savefig(folderName + "/optimization_final_\(j).png", bbox_inches: "tight")
+      plt.close("all")
+      fig.savefig(folderName + "/optimization_covergence_red_n_green_dots.png", bbox_inches: "tight")
+
+      
+
+      
+      }
+        
+
+      
+
+
+
+    } else {
+      // LOAD RAE AND TRAIN BG AND FG MODELS
+      var rae = DenseRAE(
+      imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels,
+      hiddenDimension: kHiddenDimension, latentDimension: featureSize
+      )
+      rae.load(weights: np.load("./oist_rae_weight_\(featureSize).npy", allow_pickle: true))
+      let (fg, bg, _) = getTrainingBatches(
+          dataset: data, boundingBoxSize: (40, 70), fgBatchSize: 3000, bgBatchSize: 3000,
+          fgRandomFrameCount: 10, bgRandomFrameCount: 10, useCache: true
+      )
+      let batchPositive = rae.encode(fg)
+      let foregroundModel = MultivariateGaussian(from:batchPositive, regularizer: 1e-3)
+      let batchNegative = rae.encode(bg)
+      let backgroundModel = MultivariateGaussian(from: batchNegative, regularizer: 1e-3)
+      
+      for j in 0...200 {
+        
+        // RANDOMLY PERTURB THE GROUND TRUTH POSE AND CALCULATE THE PERTURBATION
+        var (dx, dy, dtheta, startpose, v, poseId, fg) = initialize_and_perturb(p: firstObb.center)
+        // CREATE THE FACTOR AND FACTOR GRAPH
+        let factorRAE = ProbablisticTrackingFactor(poseId,
+            measurement: firstFrame,
+            encoder: rae,
+            patchSize: (40, 70),
+            appearanceModelSize: (40, 70),
+            foregroundModel: foregroundModel,
+            backgroundModel: backgroundModel,
+            maxPossibleNegativity: 1e7
+        )
+        fg.store(factorRAE)
+        // PERFORM GRADIENT DESCENT
+        var (conv, errors, xs, ys, thetas) = initialize_empty_arrays()
+        print("starting optimization")
+        for i in 0..<it_limit {
+          errors.append(factorRAE.errorVector(v[poseId]).x)
+          xs.append(v[poseId].t.x)
+          ys.append(v[poseId].t.y)
+          thetas.append(v[poseId].rot.theta)
+          // print("iteration \(i) error:", factor.errorVector(v[poseId]).x, "x:", v[poseId].t.x, "y:", v[poseId].t.y, "theta:", v[poseId].rot.theta)
+          let oldpose = v[poseId]
+          optimizer.update(&v, objective: fg)
+          // WHEN DIFF IS SO SMALL, THE OPTIMIZATION HAS CONVERGED
+          if abs(v[poseId].t.x - oldpose.t.x) < 0.000001 && abs(v[poseId].t.y - oldpose.t.y) < 0.000001 && abs(v[poseId].rot.theta - oldpose.rot.theta) < 0.000001{
+            // print("converged on iteration number \(i). Final Error:", factor.errorVector(v[poseId]), "Initial error:", factor.errorVector(startpose))
+            break
+          }
+          if i == it_limit-1 {
+            conv = false
+            // print("no convergence :( Final Error:", factor.errorVector(v[poseId]), "Initial error:", factor.errorVector(startpose))
+          }
+        }
+        // PLOT THE FINAL OPTIMIZATION RESULT
+        let x_out_of_bounds = (v[poseId].t.x > firstObb.center.t.x + xy_thresh) || (v[poseId].t.x < firstObb.center.t.x - xy_thresh)
+        let y_out_of_bounds =  (v[poseId].t.y > firstObb.center.t.y + xy_thresh) || (v[poseId].t.y < firstObb.center.t.y - xy_thresh)
+        let theta_out_of_bounds = (v[poseId].rot.theta > firstObb.center.rot.theta + theta_thresh) || (v[poseId].rot.theta < firstObb.center.rot.theta - theta_thresh)
+          if !x_out_of_bounds && !theta_out_of_bounds && !y_out_of_bounds {
+              
+              if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.1 {
+                  axs[0,0].plot(startpose.t.x,startpose.t.y,"g,", ms: 1)
+              } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.2 {
+                  axs[0,1].plot(startpose.t.x,startpose.t.y,"g,", ms: 1)
+              } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.3 {
+                  axs[1,0].plot(startpose.t.x,startpose.t.y,"g,", ms: 1)
+              } else {
+                  axs[1,1].plot(startpose.t.x,startpose.t.y,"g,", ms: 1)
+              }
+              
+          } else {
+              if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.1 {
+                  axs[0,0].plot(startpose.t.x,startpose.t.y,"r,", ms: 1)
+              } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.2 {
+                  axs[0,1].plot(startpose.t.x,startpose.t.y,"r,", ms: 1)
+              } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.3 {
+                  axs[1,0].plot(startpose.t.x,startpose.t.y,"r,", ms: 1)
+              } else {
+                  axs[1,1].plot(startpose.t.x,startpose.t.y,"r,", ms: 1)
+              }
+          }
+          let (figs, axes) = plotFrameWithPatches3(frame: firstFrame, start: startpose, end: v[poseId], expected: firstObb.center, firstGroundTruth: firstObb.center, errors: errors, xs: xs, ys: ys, thetas: thetas)
+          var final_err: Double
+          var label_err: Double
+          var start_err: Double
+
+          final_err = factorRAE.errorVector(v[poseId]).x
+          label_err = factorRAE.errorVector(firstObb.center).x
+          start_err = factorRAE.errorVector(startpose).x
+        
+          axes.set_title(String(axes.get_title())! + "\n final err = \(final_err)" 
+          + "\n label err = \(label_err).x)" 
+          + "\n start err = \(start_err)"
+          + "\n learning rate = \(lr)"
+          + "\n converged = \(conv)")
+          figs.savefig(folderName + "/optimization_final_\(j).png", bbox_inches: "tight")
+          plt.close("all")
+          fig.savefig(folderName + "/optimization_covergence_red_n_green_dots.png", bbox_inches: "tight")
+
+      }
+    }
+  }
+}
diff --git a/Scripts/Brando13.swift b/Scripts/Brando13.swift
new file mode 100644
index 00000000..8678c7cb
--- /dev/null
+++ b/Scripts/Brando13.swift
@@ -0,0 +1,421 @@
+import ArgumentParser
+import SwiftFusion
+import BeeDataset
+import BeeTracking
+import TensorFlow
+import PythonKit
+import Foundation
+import PenguinStructures
+
+/// Brando13: OPTIMIZATION CONVERGENCE VISUALIZATION with LM
+struct Brando13: ParsableCommand {
+  @Option(help: "Run for number of frames")
+  var trackLength: Int = 80
+
+  @Option(help: "Classifier or rae")
+  var useClassifier: Bool = false
+
+  func initialize_and_perturb(p: Pose2) -> (Double, Double, Double, Pose2, VariableAssignments, TypedID<Pose2>, FactorGraph) {
+    // CREATE A PLACEHOLDER FOR POSE
+    var v = VariableAssignments()
+    let poseId = v.store(p)
+    v[poseId].perturbWith(stddev: Vector3(0.3, 8, 4.6))
+    let dx = v[poseId].t.x - p.t.x
+    let dy = v[poseId].t.y - p.t.y
+    let dtheta = v[poseId].rot.theta - p.rot.theta
+    let startpose = v[poseId]
+    let fg = FactorGraph()
+
+    return (dx, dy, dtheta, startpose, v, poseId, fg)
+  }
+
+  func initialize_empty_arrays() -> (Bool, [Double], [Double], [Double], [Double]) {
+    var conv = true
+    var errors = [Double]()
+    var xs = [Double]()
+    var ys = [Double]()
+    var thetas = [Double]()
+    return (conv, errors, xs, ys, thetas)
+  }
+
+
+
+  func run() {
+    let np = Python.import("numpy")
+    let plt = Python.import("matplotlib.pyplot")
+    let trainingDatasetSize = 100
+
+    // LOAD THE IMAGE AND THE GROUND TRUTH ORIENTED BOUNDING BOX
+    let dataDir = URL(fileURLWithPath: "./OIST_Data")
+    let testData = OISTBeeVideo(directory: dataDir, afterIndex: trainingDatasetSize, length: trackLength)!
+    let data = OISTBeeVideo(directory: dataDir, length: trainingDatasetSize)!
+    let frames = testData.frames
+    let firstTrack = testData.tracks[0]
+    // let firstTrack = testData.tracks[5]
+    let firstFrame = frames[0]
+    let firstObb = firstTrack.boxes[0]
+    // let firstObb = firstTrack.boxes[5]
+      
+
+    //OPTIMIZER GRADIENT DESCENT
+    // let lr = 1e-7
+    // var optimizer = GradientDescent(learningRate: lr)
+    let it_limit = 200
+      /// The set of steps taken.
+    var step: Int = 0
+    
+    /// Desired precision, TODO(fan): make this actually work
+    var precision: Double = 1e-10
+
+    /// The precision of the CGLS solver.
+    var cgls_precision: Double = 1e-10
+    
+    /// Maximum number of L-M iterations
+    var max_iteration: Int = 50
+    
+    /// Maximum number of G-N iterations
+    var max_inner_iteration: Int = 400
+    
+    /// Maximam Lambda
+    var max_lambda: Double = 1e32
+    
+    /// Minimum Lambda
+    var min_lambda: Double = 1e-16
+    
+    /// Initial Lambda
+    // var initial_lambda: Double = 1e-4
+    var initial_lambda: Double = 1e7
+    
+    /// Lambda Factor
+    var lambda_factor: Double = 2
+
+
+    //CREATE A FOLDER TO CONTAIN THE END-RESULT IMAGES OF THE OPTIMIZATION
+    let str: String
+    if useClassifier{
+      str = "NNC"
+    } else {
+      str = "RAE"
+    }
+    let folderName = "Results/LM_optimization_\(str)__17_09_2021_final_images_4subplots"
+    if !FileManager.default.fileExists(atPath: folderName) {
+    do {
+        try FileManager.default.createDirectory(atPath: folderName, withIntermediateDirectories: true, attributes: nil)
+    } catch {
+        print(error.localizedDescription)
+    }
+    }
+
+
+
+
+
+    //CREATE A FIG
+    print("hello1")
+    let (fig, axs) = plt.subplots(2,2).tuple2
+    let fr = np.squeeze(firstFrame.makeNumpyArray())
+    for i in 0...1 {
+      for j in 0...1 {
+        axs[i,j].imshow(fr / 255.0, cmap: "gray")
+        let firstGroundTruth = firstObb.center
+        // axs[i,j].plot(firstObb.corners.map{$0.x} + [firstObb.corners.first!.x], firstObb.corners.map{$0.y} + [firstObb.corners.first!.y], "b-")
+        axs[i,j].set_xlim(firstGroundTruth.t.x - 50, firstGroundTruth.t.x + 50)
+        axs[i,j].set_ylim(firstGroundTruth.t.y - 50, firstGroundTruth.t.y + 50)
+        axs[i,j].get_xaxis().set_visible(false)
+        axs[i,j].get_yaxis().set_visible(false)
+      }
+    }
+    axs[0,0].set_title("fabs(theta) < 5deg", fontsize:8)
+    axs[0,1].set_title("fabs(theta) < 10deg", fontsize:8)
+    axs[1,0].set_title("fabs(theta) < 25deg", fontsize:8)
+    axs[1,1].set_title("fabs(theta) >= 25deg", fontsize:8)
+
+    print("hello")
+    let xy_thresh = 20.0 //pixels
+    let theta_thresh = 0.5 //radians // consider doing overlap.
+
+    
+    // NN Params
+    let (imageHeight, imageWidth, imageChannels) = (40, 70, 1)
+    let featureSize = 256
+    let kHiddenDimension = 512
+
+
+    if useClassifier {
+      print("using classifier")
+      // var classifier = NNClassifier(
+      //   imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels, hiddenDimension: kHiddenDimension, latentDimension: featureSize
+      // )
+      // classifier.load(weights: np.load("./classifiers/classifiers_today/classifier_weight_\(kHiddenDimension)_\(featureSize)_1_doubletraining.npy", allow_pickle: true))
+      
+      // for j in 0...200 {
+      //   // RANDOMLY PERTURB THE GROUND TRUTH POSE AND CALCULATE THE PERTURBATION
+      //   var (dx, dy, dtheta, startpose, v, poseId, fg) = initialize_and_perturb(p: firstObb.center)
+      //   // CREATE THE FACTOR AND FACTOR GRAPH
+      //   let factorNNC = ProbablisticTrackingFactor2(poseId,
+      //     measurement: firstFrame,
+      //     classifier: classifier,
+      //     patchSize: (40, 70),
+      //     appearanceModelSize: (40, 70)
+      //   )
+      //   fg.store(factorNNC)
+      //   print(firstObb.center)
+
+
+      //   // PERFORM GRADIENT DESCENT
+      // var (conv, errors, xs, ys, thetas) = initialize_empty_arrays()
+      // print("starting optimization")
+      // for i in 0..<it_limit {
+      //   errors.append(factorNNC.errorVector(v[poseId]).x)
+      //   xs.append(v[poseId].t.x)
+      //   ys.append(v[poseId].t.y)
+      //   thetas.append(v[poseId].rot.theta)
+      //   // print("iteration \(i) error:", factor.errorVector(v[poseId]).x, "x:", v[poseId].t.x, "y:", v[poseId].t.y, "theta:", v[poseId].rot.theta)
+      //   let oldpose = v[poseId]
+      //   optimizer.update(&v, objective: fg)
+      //   // WHEN DIFF IS SO SMALL, THE OPTIMIZATION HAS CONVERGED
+      //   if abs(v[poseId].t.x - oldpose.t.x) < 0.000001 && abs(v[poseId].t.y - oldpose.t.y) < 0.000001 && abs(v[poseId].rot.theta - oldpose.rot.theta) < 0.000001{
+      //     // print("converged on iteration number \(i). Final Error:", factor.errorVector(v[poseId]), "Initial error:", factor.errorVector(startpose))
+      //     break
+      //   }
+      //   if i == it_limit-1 {
+      //     conv = false
+      //     // print("no convergence :( Final Error:", factor.errorVector(v[poseId]), "Initial error:", factor.errorVector(startpose))
+      //   }
+      // }
+      // // PLOT THE FINAL OPTIMIZATION RESULT
+      // let x_out_of_bounds = (v[poseId].t.x > firstObb.center.t.x + xy_thresh) || (v[poseId].t.x < firstObb.center.t.x - xy_thresh)
+      // let y_out_of_bounds =  (v[poseId].t.y > firstObb.center.t.y + xy_thresh) || (v[poseId].t.y < firstObb.center.t.y - xy_thresh)
+      // let theta_out_of_bounds = (v[poseId].rot.theta > firstObb.center.rot.theta + theta_thresh) || (v[poseId].rot.theta < firstObb.center.rot.theta - theta_thresh)
+      // if !x_out_of_bounds && !theta_out_of_bounds && !y_out_of_bounds {
+      //     // plot a green dot
+      //     // ax.scatter(startpose.t.x-Double(xbegin),startpose.t.y-Double(ybegin),c:"r", marker: ",")
+      //     // ax.scatter(startpose.t.x,startpose.t.y,c:"r", marker: ",")
+      //     if fabs(startpose.rot.theta*180/Double.pi - firstObb.center.rot.theta*180/Double.pi) < 5 {
+      //         axs[0,0].plot(startpose.t.x,startpose.t.y,"g,", ms: 2)
+      //     } else if fabs(startpose.rot.theta*180/Double.pi - firstObb.center.rot.theta*180/Double.pi) < 10 {
+      //         axs[0,1].plot(startpose.t.x,startpose.t.y,"g,", ms: 2)
+      //     } else if fabs(startpose.rot.theta*180/Double.pi - firstObb.center.rot.theta*180/Double.pi) < 25 {
+      //         axs[1,0].plot(startpose.t.x,startpose.t.y,"g,", ms: 2)
+      //     } else {
+      //         axs[1,1].plot(startpose.t.x,startpose.t.y,"g,", ms: 2)
+      //     }
+          
+      // } else {
+      //     // ax.scatter(startpose.t.x-Double(xbegin),startpose.t.y-Double(ybegin),c:"g", marker: ",")
+      //     // ax.scatter(startpose.t.x,startpose.t.y,c:"g", marker: ",")
+      //     if fabs(startpose.rot.theta*180/Double.pi - firstObb.center.rot.theta*180/Double.pi) < 5 {
+      //         axs[0,0].plot(startpose.t.x,startpose.t.y,"r,", ms: 2)
+      //     } else if fabs(startpose.rot.theta*180/Double.pi - firstObb.center.rot.theta*180/Double.pi) < 10 {
+      //         axs[0,1].plot(startpose.t.x,startpose.t.y,"r,", ms: 2)
+      //     } else if fabs(startpose.rot.theta*180/Double.pi - firstObb.center.rot.theta*180/Double.pi) < 25 {
+      //         axs[1,0].plot(startpose.t.x,startpose.t.y,"r,", ms: 2)
+      //     } else {
+      //         axs[1,1].plot(startpose.t.x,startpose.t.y,"r,", ms: 2)
+      //     }
+      // }
+      // let (figs, axes) = plotFrameWithPatches3(frame: firstFrame, start: startpose, end: v[poseId], expected: firstObb.center, firstGroundTruth: firstObb.center, errors: errors, xs: xs, ys: ys, thetas: thetas)
+      // var final_err: Double
+      // var label_err: Double
+      // var start_err: Double
+
+      
+      // final_err = factorNNC.errorVector(v[poseId]).x
+      // label_err = factorNNC.errorVector(firstObb.center).x
+      // start_err = factorNNC.errorVector(startpose).x
+
+      // axes.set_title(String(axes.get_title())! + "\n final err = \(final_err)" 
+      // + "\n label err = \(label_err).x)" 
+      // + "\n start err = \(start_err)"
+      // + "\n learning rate = \(lr)"
+      // + "\n converged = \(conv)")
+      // figs.savefig(folderName + "/optimization_final_\(j).png", bbox_inches: "tight")
+      // // let (figs2, axes2) = plotXYandTheta(xs: xs, ys: ys, thetas: thetas)
+      // // figs2.savefig(folderName + "/optimization_final_\(j)_XYtheta.png", bbox_inches: "tight")
+      // plt.close("all")
+      // fig.savefig(folderName + "/optimization_covergence_red_n_green_dots.png", bbox_inches: "tight")
+
+      
+
+      
+      // }
+        
+
+      
+
+
+
+    } else {
+      // LOAD RAE AND TRAIN BG AND FG MODELS
+      var rae = DenseRAE(
+      imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels,
+      hiddenDimension: kHiddenDimension, latentDimension: featureSize
+      )
+      rae.load(weights: np.load("./oist_rae_weight_\(featureSize).npy", allow_pickle: true))
+      let (fg, bg, _) = getTrainingBatches(
+          dataset: data, boundingBoxSize: (40, 70), fgBatchSize: 3000, bgBatchSize: 3000,
+          fgRandomFrameCount: 10, bgRandomFrameCount: 10, useCache: true
+      )
+      let batchPositive = rae.encode(fg)
+      let foregroundModel = MultivariateGaussian(from:batchPositive, regularizer: 1e-3)
+      let batchNegative = rae.encode(bg)
+      let backgroundModel = MultivariateGaussian(from: batchNegative, regularizer: 1e-3)
+      
+      for j in 0...200 {
+        
+        // RANDOMLY PERTURB THE GROUND TRUTH POSE AND CALCULATE THE PERTURBATION
+        var (dx, dy, dtheta, startpose, v, poseId, fg) = initialize_and_perturb(p: firstObb.center)
+        // CREATE THE FACTOR AND FACTOR GRAPH
+        let factorRAE = ProbablisticTrackingFactor(poseId,
+            measurement: firstFrame,
+            encoder: rae,
+            patchSize: (40, 70),
+            appearanceModelSize: (40, 70),
+            foregroundModel: foregroundModel,
+            backgroundModel: backgroundModel,
+            maxPossibleNegativity: 1e7
+        )
+        fg.store(factorRAE)
+        print(firstObb.center)
+        // PERFORM GRADIENT DESCENT
+        var (conv, errors, xs, ys, thetas) = initialize_empty_arrays()
+        print("starting optimization LM")
+        var old_error = fg.linearizableError(at: v)
+        
+        var lambda: Double = initial_lambda
+        var inner_iter_step = 0
+        var inner_success = false
+        var all_done = false
+        var i = 0
+        precision = 1e-10
+        max_iteration = 50
+        step = 0
+        for i in 0..<max_iteration { // outer loop
+          // print("LM iteration", i)
+          // i+=1
+          
+          let gfg = fg.linearized(at: v)
+          let dx = v.tangentVectorZeros
+          
+          // Try lambda steps
+          while true {            
+            var damped = gfg
+            
+            damped.addScalarJacobians(lambda)
+            
+            let old_linear_error = damped.error(at: dx)
+            
+            var dx_t = dx
+            var optimizer = GenericCGLS(precision: cgls_precision, max_iteration: max_inner_iteration)
+            optimizer.optimize(gfg: damped, initial: &dx_t)
+            let oldval = v
+            v.move(along: dx_t)
+            let this_error = fg.linearizableError(at: v)
+            let delta_error = old_error - this_error
+            //APPEND
+            errors.append(factorRAE.errorVector(v[poseId]).x)
+            xs.append(v[poseId].t.x)
+            ys.append(v[poseId].t.y)
+            thetas.append(v[poseId].rot.theta)
+            
+            let new_linear_error = damped.error(at: dx_t)
+            let model_fidelity = delta_error / (old_linear_error - new_linear_error)
+            
+            inner_success = false
+            if delta_error > .ulpOfOne && model_fidelity > 0.01 {
+              old_error = this_error
+              
+              // Success, decrease lambda
+              if lambda > min_lambda {
+                lambda = lambda / lambda_factor
+              }
+              
+              inner_success = true
+            } else {
+              
+              // increase lambda and retry
+              v = oldval
+              if lambda > max_lambda {
+                print("OOOOOOOHHHHHH SHIT!")
+                break
+              }
+              lambda = lambda * lambda_factor
+            }
+            
+            if model_fidelity > 0.5 && delta_error < precision || this_error < precision {
+              inner_success = true
+              all_done = true
+              break
+            }
+            
+            inner_iter_step += 1
+            if inner_success {
+              break
+            }
+          }
+          
+          step += 1
+          
+          if all_done {
+            break
+          }
+          if i == max_iteration-1 {
+            conv = false
+          }
+        }
+        
+        
+        // PLOT THE FINAL OPTIMIZATION RESULT
+        let x_out_of_bounds = (v[poseId].t.x > firstObb.center.t.x + xy_thresh) || (v[poseId].t.x < firstObb.center.t.x - xy_thresh)
+        let y_out_of_bounds =  (v[poseId].t.y > firstObb.center.t.y + xy_thresh) || (v[poseId].t.y < firstObb.center.t.y - xy_thresh)
+        let theta_out_of_bounds = (v[poseId].rot.theta > firstObb.center.rot.theta + theta_thresh) || (v[poseId].rot.theta < firstObb.center.rot.theta - theta_thresh)
+        if !x_out_of_bounds && !theta_out_of_bounds && !y_out_of_bounds {
+            // plot a green dot
+            // ax.scatter(startpose.t.x-Double(xbegin),startpose.t.y-Double(ybegin),c:"r", marker: ",")
+            // ax.scatter(startpose.t.x,startpose.t.y,c:"r", marker: ",")
+            if fabs(startpose.rot.theta*180/Double.pi - firstObb.center.rot.theta*180/Double.pi) < 5 {
+                axs[0,0].plot(startpose.t.x,startpose.t.y,"g,", ms: 2)
+            } else if fabs(startpose.rot.theta*180/Double.pi - firstObb.center.rot.theta*180/Double.pi) < 10 {
+                axs[0,1].plot(startpose.t.x,startpose.t.y,"g,", ms: 2)
+            } else if fabs(startpose.rot.theta*180/Double.pi - firstObb.center.rot.theta*180/Double.pi) < 25 {
+                axs[1,0].plot(startpose.t.x,startpose.t.y,"g,", ms: 2)
+            } else {
+                axs[1,1].plot(startpose.t.x,startpose.t.y,"g,", ms: 2)
+            }
+            
+        } else {
+            // ax.scatter(startpose.t.x-Double(xbegin),startpose.t.y-Double(ybegin),c:"g", marker: ",")
+            // ax.scatter(startpose.t.x,startpose.t.y,c:"g", marker: ",")
+            if fabs(startpose.rot.theta*180/Double.pi - firstObb.center.rot.theta*180/Double.pi) < 5 {
+                axs[0,0].plot(startpose.t.x,startpose.t.y,"r,", ms: 2)
+            } else if fabs(startpose.rot.theta*180/Double.pi - firstObb.center.rot.theta*180/Double.pi) < 10 {
+                axs[0,1].plot(startpose.t.x,startpose.t.y,"r,", ms: 2)
+            } else if fabs(startpose.rot.theta*180/Double.pi - firstObb.center.rot.theta*180/Double.pi) < 25 {
+                axs[1,0].plot(startpose.t.x,startpose.t.y,"r,", ms: 2)
+            } else {
+                axs[1,1].plot(startpose.t.x,startpose.t.y,"r,", ms: 2)
+            }
+        }
+          let (figs, axes) = plotFrameWithPatches3(frame: firstFrame, start: startpose, end: v[poseId], expected: firstObb.center, firstGroundTruth: firstObb.center, errors: errors, xs: xs, ys: ys, thetas: thetas)
+          var final_err: Double
+          var label_err: Double
+          var start_err: Double
+
+          final_err = factorRAE.errorVector(v[poseId]).x
+          label_err = factorRAE.errorVector(firstObb.center).x
+          start_err = factorRAE.errorVector(startpose).x
+        
+          axes.set_title(String(axes.get_title())! + "\n final err = \(final_err)" 
+          + "\n label err = \(label_err).x)" 
+          + "\n start err = \(start_err)"
+          + "\n converged = \(conv)")
+          figs.savefig(folderName + "/optimization_final_\(j).png", bbox_inches: "tight")
+          // let (figs2, axes2) = plotXYandTheta(xs: xs, ys: ys, thetas: thetas)
+          // figs2.savefig(folderName + "/optimization_final_\(j)_XYtheta.png", bbox_inches: "tight")
+          plt.close("all")
+          fig.savefig(folderName + "/optimization_covergence_red_n_green_dots.png", bbox_inches: "tight")
+
+      }
+    }
+  }
+}
\ No newline at end of file
diff --git a/Scripts/Brando14.swift b/Scripts/Brando14.swift
new file mode 100644
index 00000000..70d64f2b
--- /dev/null
+++ b/Scripts/Brando14.swift
@@ -0,0 +1,199 @@
+import ArgumentParser
+import SwiftFusion
+import BeeDataset
+import BeeTracking
+import TensorFlow
+import PythonKit
+import Foundation
+import PenguinStructures
+
+/// Brando14: ERRORVALUE over entire image
+struct Brando14: ParsableCommand {
+  @Option(help: "Run for number of frames")
+  var trackLength: Int = 80
+
+  @Option(help: "Classifier or rae")
+  var useClassifier: Bool = true
+
+
+  func run() {
+    let np = Python.import("numpy")
+    let plt = Python.import("matplotlib.pyplot")
+    let trainingDatasetSize = 100
+
+    // LOAD THE IMAGE AND THE GROUND TRUTH ORIENTED BOUNDING BOX
+    let dataDir = URL(fileURLWithPath: "./OIST_Data")
+    let testData = OISTBeeVideo(directory: dataDir, afterIndex: trainingDatasetSize, length: trackLength)!
+    let data = OISTBeeVideo(directory: dataDir, length: trainingDatasetSize)!
+    let frames = testData.frames
+    let firstTrack = testData.tracks[0]
+    let firstFrame = frames[0]
+    let firstObb = firstTrack.boxes[0]
+
+    let range = 100.0
+      
+    // NN Params
+    let (imageHeight, imageWidth, imageChannels) = (40, 70, 1)
+    let featureSize = 512
+    let kHiddenDimension = 512
+
+
+    //CREATE A FOLDER TO CONTAIN THE END-RESULT IMAGES OF THE OPTIMIZATION
+    let str: String
+    if useClassifier{
+      str = "NNC"
+    } else {
+      str = "RAE"
+    }
+    let lr = 1e-6
+    let folderName = "Results/ErrorValueVizualized_\(str)_20000boxes_300epochs_retrained(0.0, 30, 0)_lr=\(lr)_2nd_iter.npy"
+    if !FileManager.default.fileExists(atPath: folderName) {
+    do {
+        try FileManager.default.createDirectory(atPath: folderName, withIntermediateDirectories: true, attributes: nil)
+    } catch {
+        print(error.localizedDescription)
+    }
+    }
+
+
+
+    let firstGroundTruth = firstObb.center
+    print("oBB coordinates", firstGroundTruth.t.x, firstGroundTruth.t.y)
+
+    //CREATE A FIG
+    print("hello1")
+    let (fig, axs) = plt.subplots(1,2).tuple2
+    let fr = np.squeeze(firstFrame.makeNumpyArray())
+    axs[0].imshow(fr / 255.0, cmap: "gray")
+
+        
+    axs[0].set_xlim(firstGroundTruth.t.x - range/2, firstGroundTruth.t.x + range/2)
+    axs[0].set_ylim(firstGroundTruth.t.y - range/2, firstGroundTruth.t.y + range/2)
+    axs[1].set_xlim(0, range)
+    axs[1].set_ylim(0, range)
+    
+    let x = firstGroundTruth.t.x
+    let y = firstGroundTruth.t.y
+
+
+
+    
+
+    var values = Tensor<Double>(zeros: [Int(range), Int(range)])
+    print("printing tensor",values)
+
+    if useClassifier {
+      var classifier = NNClassifier(
+        imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels, hiddenDimension: kHiddenDimension, latentDimension: featureSize
+      )
+      classifier.load(weights: np.load("./classifiers/classifiers_today/classifier_weight_512_512_1_20000boxes_300epochs_retrained(0.0, 30, 0)_lr=\(lr)_2nd_iter.npy", allow_pickle: true))
+
+      print("done loading")
+      for i in 0...Int(range)-1 {
+        for j in 0...Int(range)-1 {
+            let t = Vector2(x-range/2+Double(i), y-range/2+Double(j))
+            // print("here3")
+            let p = Pose2(firstGroundTruth.rot, t)
+            var v = VariableAssignments()
+            let poseId = v.store(p)
+            let startpose = v[poseId]
+            var fg = FactorGraph()
+                // CREATE THE FACTOR AND FACTOR GRAPH
+            let factorNNC = ProbablisticTrackingFactor2(poseId,
+            measurement: firstFrame,
+            classifier: classifier,
+            patchSize: (40, 70),
+            appearanceModelSize: (40, 70)
+            )
+            fg.store(factorNNC)
+            values[i,j] = Tensor<Double>(factorNNC.errorVector(v[poseId]).x)
+            // print(Tensor<Double>(factorNNC.errorVector(v[poseId]).x))
+
+
+
+
+
+        }
+        print("row", i)
+      }
+      let min_val = values.min()
+      if Double(min_val)! < 0 {
+        values = values-min_val
+      }
+      values = values/values.max()*255
+      print(values[0...,0])
+      print(values.shape)
+      axs[1].imshow(values.makeNumpyArray())
+      fig.savefig(folderName + "/vizual_NNC.png", bbox_inches: "tight")
+
+      
+
+      
+        
+
+      
+
+
+
+    } else {
+        print("RAE")
+        // LOAD RAE AND TRAIN BG AND FG MODELS
+        var rae = DenseRAE(
+        imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels,
+        hiddenDimension: kHiddenDimension, latentDimension: featureSize
+        )
+        rae.load(weights: np.load("./oist_rae_weight_\(featureSize).npy", allow_pickle: true))
+        let (fg, bg, _) = getTrainingBatches(
+            dataset: data, boundingBoxSize: (40, 70), fgBatchSize: 3000, bgBatchSize: 3000,
+            fgRandomFrameCount: 10, bgRandomFrameCount: 10, useCache: true
+        )
+        let batchPositive = rae.encode(fg)
+        let foregroundModel = MultivariateGaussian(from:batchPositive, regularizer: 1e-3)
+        let batchNegative = rae.encode(bg)
+        let backgroundModel = MultivariateGaussian(from: batchNegative, regularizer: 1e-3)
+        for i in 0...Int(range)-1 {
+            for j in 0...Int(range)-1 {
+                let t = Vector2(x-50.0+Double(i), y-50.0+Double(j))
+                let p = Pose2(firstGroundTruth.rot, t)
+                var v = VariableAssignments()
+                let poseId = v.store(p)
+                let startpose = v[poseId]
+                var fg = FactorGraph()
+                    // CREATE THE FACTOR AND FACTOR GRAPH
+                let factorRAE = ProbablisticTrackingFactor(poseId,
+                    measurement: firstFrame,
+                    encoder: rae,
+                    patchSize: (40, 70),
+                    appearanceModelSize: (40, 70),
+                    foregroundModel: foregroundModel,
+                    backgroundModel: backgroundModel,
+                    maxPossibleNegativity: 1e7
+                )
+                fg.store(factorRAE)
+                values[i,j] = Tensor<Double>(factorRAE.errorVector(v[poseId]).x)
+
+
+
+
+            }
+            print("row", i)
+        }
+        print(values[0...,0])
+        let min_val = values.min()
+        if Double(min_val)! < 0 {
+            values = values-min_val
+        }
+        values = values/values.max()*255
+        print(values[0...,0])
+        print(values.shape)
+        axs[1].imshow(values.makeNumpyArray())
+
+      fig.savefig(folderName + "/vizual_RAE.png", bbox_inches: "tight")
+
+      
+
+
+      
+    }
+  }
+}
\ No newline at end of file
diff --git a/Scripts/Brando15.swift b/Scripts/Brando15.swift
new file mode 100644
index 00000000..59b9ccf9
--- /dev/null
+++ b/Scripts/Brando15.swift
@@ -0,0 +1,91 @@
+import ArgumentParser
+
+import SwiftFusion
+import BeeDataset
+import BeeTracking
+import TensorFlow
+import PythonKit
+import Foundation
+
+
+
+
+/// Brando15 SAVE PATCHES FOR LATER USE
+struct Brando15: ParsableCommand {
+  typealias LikelihoodModel = TrackingLikelihoodModel<PretrainedDenseRAE, MultivariateGaussian, GaussianNB>
+
+
+  @Flag(help: "Training mode")
+  var training: Bool = false
+
+  let num_boxes: Int = 10000
+
+  func getTrainingDataBG(
+    from dataset: OISTBeeVideo
+  ) -> (Tensor<Float>, Tensor<Double>) {
+    print("bg")
+
+    // var allBoxes = [LikelihoodModel.Datum]()
+    let bgBoxes = dataset.makeBackgroundBoundingBoxes(patchSize: (40, 70), batchSize: num_boxes).map {
+      // (frame: $0.frame, type: LikelihoodModel.PatchType.bg, obb: $0.obb)
+      $0.frame!.patch(at: $0.obb)
+    }
+    print("bg2")
+    let labels = Tensor<Float>(zeros: [num_boxes])
+    print("labels done bg")
+    let patches = Tensor<Double>(stacking: bgBoxes.map {$0})
+    print("patches done bg")
+    return (labels, patches)
+  }
+
+
+  func getTrainingDataFG(
+    from dataset: OISTBeeVideo
+  ) -> (Tensor<Float>, Tensor<Double>) {
+    print("fg")
+    // var allBoxes = [LikelihoodModel.Datum]()
+    let fgBoxes = dataset.makeForegroundBoundingBoxes(patchSize: (40, 70), batchSize: num_boxes).map {
+      // (frame: $0.frame, type: LikelihoodModel.PatchType.bg, obb: $0.obb)
+      $0.frame!.patch(at: $0.obb)
+    }
+    print("bg2")
+    let labels = Tensor<Float>(ones: [num_boxes])
+    print("labels done bg")
+    let patches = Tensor<Double>(stacking: fgBoxes.map {$0})
+    print("patches done bg")
+    return (labels, patches)
+  }
+
+
+
+
+  // Just runs an RP tracker and saves image to file
+  // Make sure you have a folder `Results/fan12` before running
+  func run() {
+    let folderName = "classifiers/classifiers_today"
+    if !FileManager.default.fileExists(atPath: folderName) {
+      do {
+          try FileManager.default.createDirectory(atPath: folderName, withIntermediateDirectories: true, attributes: nil)
+      } catch {
+          print(error.localizedDescription)
+      }
+    } else {
+      print("folder exists")
+    }
+    
+    
+    let dataDir = URL(fileURLWithPath: "./OIST_Data")
+    let trainingDataset = OISTBeeVideo(directory: dataDir, length: 100)!
+    var (labels_bg, patches_bg) = getTrainingDataBG(from: trainingDataset)
+    let np = Python.import("numpy")
+    np.save("Patches_bg_\(num_boxes).npy", patches_bg.makeNumpyArray())
+    var (labels_fg, patches_fg) = getTrainingDataFG(from: trainingDataset)
+
+    // var patches = concatenate(patches_bg, patches_fg)
+    var patches = Tensor(stacking: patches_bg.unstacked() + patches_fg.unstacked())
+    var labels = Tensor<Int8>(concatenate(labels_bg, labels_fg))
+    print("shape of patches", patches.shape)
+    print("shape of labels", labels.shape)
+    np.save("Patches_bg_fg_\(num_boxes).npy", patches.makeNumpyArray())
+  }
+}
diff --git a/Scripts/Brando16.swift b/Scripts/Brando16.swift
new file mode 100644
index 00000000..43343228
--- /dev/null
+++ b/Scripts/Brando16.swift
@@ -0,0 +1,126 @@
+import ArgumentParser
+
+import SwiftFusion
+import BeeDataset
+import BeeTracking
+import TensorFlow
+import PythonKit
+import Foundation
+
+import PenguinStructures
+
+/// PCA tests
+struct Brando16: ParsableCommand {
+  typealias LikelihoodModel = TrackingLikelihoodModel<PretrainedDenseRAE, MultivariateGaussian, GaussianNB>
+
+  @Option(help: "Run for number of frames")
+  var trackLength: Int = 80
+
+
+   func getTrainingData(
+    from dataset: OISTBeeVideo,
+    numberForeground: Int = 4500
+  ) -> [LikelihoodModel.Datum] {
+    let fgBoxes = dataset.makeForegroundBoundingBoxes(patchSize: (40, 70), batchSize: numberForeground).map {
+      (frame: $0.frame, type: LikelihoodModel.PatchType.fg, obb: $0.obb)
+    }
+    
+    return fgBoxes
+  }
+
+  // Runs RAE tracker on n number of sequences and outputs relevant images and statistics
+  // Make sure you have a folder `Results/andrew01` before running
+  func run() {
+    let np = Python.import("numpy")
+    let pickle = Python.import("pickle")
+    // used to be 512
+
+    let (imageHeight, imageWidth, imageChannels) =
+      (40, 70, 1)
+
+
+    var kHiddenDimension = [16, 64, 256]   
+    for dim in kHiddenDimension {
+        let dataDir = URL(fileURLWithPath: "./OIST_Data")
+
+        let trainingDataset = OISTBeeVideo(directory: dataDir, length: 100)!
+        
+        let trainingData = Tensor<Double>(stacking: getTrainingData(from: trainingDataset).map { $0.frame!.patch(at: $0.obb) })
+        let testData = OISTBeeVideo(directory: dataDir, afterIndex: 100, length: trackLength)!
+
+
+        var statistics = FrameStatistics(Tensor<Double>(0.0))
+        statistics.mean = Tensor(62.26806976644069)
+        statistics.standardDeviation = Tensor(37.44683834503672)
+        let trainingBatch = trainingDataset.makeBatch(statistics: statistics, appearanceModelSize: (imageHeight, imageWidth), batchSize: 4500)
+        let rae = PCAEncoder(from: trainingBatch, given: dim)
+        
+
+
+        let trackerEvaluation = TrackerEvaluationDataset(testData)
+        print("s1")
+        let evalTracker: Tracker = {frames, start in
+            var tracker = trainProbabilisticTracker(
+                trainingData: trainingDataset,
+                encoder: rae,
+                frames: frames,
+                boundingBoxSize: (40, 70),
+                withFeatureSize: dim,
+                fgRandomFrameCount: 100,
+                bgRandomFrameCount: 100
+            )
+            let prediction = tracker.infer(knownStart: Tuple1(start.center), withSampling: true)
+            let track = tracker.frameVariableIDs.map { OrientedBoundingBox(center: prediction[unpack($0)], rows: 40, cols:70) }
+
+            return track
+        }
+        let plt = Python.import("matplotlib.pyplot")
+        let sequenceCount = 19
+        var results = trackerEvaluation.evaluate(evalTracker, sequenceCount: sequenceCount, deltaAnchor: 175, outputFile: "andrew01")
+
+              for (index, value) in results.sequences.prefix(sequenceCount).enumerated() {
+                var i: Int = 0
+                zip(value.subsequences.first!.frames, zip(value.subsequences.first!.prediction, value.subsequences.first!.groundTruth)).map {
+                  let (fig, axes) = plotFrameWithPatches(frame: $0.0, actual: $0.1.0.center, expected: $0.1.1.center, firstGroundTruth: value.subsequences.first!.groundTruth.first!.center)
+                  fig.savefig("Results/ppca_\(dim)/sequence\(index)/andrew01_\(i).png", bbox_inches: "tight")
+                  plt.close("all")
+                  i = i + 1
+                }
+                
+                
+                let (fig, axes) = plt.subplots(1, 2, figsize: Python.tuple([20, 20])).tuple2
+                fig.suptitle("Tracking positions and Subsequence Average Overlap with Accuracy \(String(format: "%.2f", value.subsequences.first!.metrics.accuracy)) and Robustness \(value.subsequences.first!.metrics.robustness).")
+                
+                value.subsequences.map {
+                  let encoder = JSONEncoder()
+                  let data = try! encoder.encode($0.prediction)
+                  FileManager.default.createFile(atPath: "Results/ppca_\(dim)/prediction_ppca_\(dim)_sequence_\(index).json", contents: data, attributes: nil)
+                  plotPoseDifference(
+                    track: $0.prediction.map{$0.center}, withGroundTruth: $0.groundTruth.map{$0.center}, on: axes[0]
+                  )
+                }
+                plotOverlap(
+                    metrics: value.subsequences.first!.metrics, on: axes[1]
+                )
+                fig.savefig("Results/ppca_\(dim)/andrew01_subsequence\(index).png", bbox_inches: "tight")
+                print("Accuracy for sequence is \(value.sequenceMetrics.accuracy) with Robustness of \(value.sequenceMetrics.robustness)")
+              }
+
+              print("Accuracy for all sequences is \(results.trackerMetrics.accuracy) with Robustness of \(results.trackerMetrics.robustness)")
+              let f = Python.open("Results/ppca_\(dim)/EAO/rp_\(dim).data", "wb")
+              pickle.dump(results.expectedAverageOverlap.curve, f)
+
+
+    }
+    
+  }
+}
+
+/// Returns `t` as a Swift tuple.
+fileprivate func unpack<A, B>(_ t: Tuple2<A, B>) -> (A, B) {
+  return (t.head, t.tail.head)
+}
+/// Returns `t` as a Swift tuple.
+fileprivate func unpack<A>(_ t: Tuple1<A>) -> (A) {
+  return (t.head)
+}
\ No newline at end of file
diff --git a/Scripts/Fan03.swift b/Scripts/Fan03.swift
index d153b95b..ce2c1aa8 100644
--- a/Scripts/Fan03.swift
+++ b/Scripts/Fan03.swift
@@ -27,7 +27,7 @@ struct Fan03: ParsableCommand {
     
     let rp = RandomProjection(fromShape: TensorShape([imageHeight, imageWidth, imageChannels]), toFeatureSize: featureSize)
 
-    let (fig, _, _) = runProbabilisticTracker(
+    let (fig, track, gt) = runProbabilisticTracker(
       directory: dataDir,
       encoder: rp,
       onTrack: trackId, forFrames: trackLength, withSampling: true,
@@ -37,5 +37,17 @@ struct Fan03: ParsableCommand {
 
     /// Actual track v.s. ground truth track
     fig.savefig("Results/fan03/fan03_track\(trackId)_\(featureSize).pdf", bbox_inches: "tight")
+
+
+    let json = JSONEncoder()
+    json.outputFormatting = .prettyPrinted
+    let track_data = try! json.encode(track)
+    try! track_data.write(to: URL(fileURLWithPath: "Results/fan04/fan04_track_\(trackId)_\(featureSize).json"))
+
+    let gt_data = try! json.encode(gt)
+    try! gt_data.write(to: URL(fileURLWithPath: "Results/fan04/fan04_gt_\(trackId)_\(featureSize).json"))
+
+    
+
   }
 }
diff --git a/Scripts/Fan05.swift b/Scripts/Fan05.swift
index ff43e4c2..1082d207 100644
--- a/Scripts/Fan05.swift
+++ b/Scripts/Fan05.swift
@@ -31,7 +31,6 @@ struct Fan05: ParsableCommand {
     let fgBoxes = dataset.makeForegroundBoundingBoxes(patchSize: (40, 70), batchSize: numberForeground).map {
       (frame: $0.frame, type: LikelihoodModel.PatchType.fg, obb: $0.obb)
     }
-    
     return fgBoxes + bgBoxes
   }
   
diff --git a/Scripts/Fan12.swift b/Scripts/Fan12.swift
index cf4649b7..ecaaca94 100644
--- a/Scripts/Fan12.swift
+++ b/Scripts/Fan12.swift
@@ -31,7 +31,7 @@ struct Fan12: ParsableCommand {
   // Just runs an RP tracker and saves image to file
   // Make sure you have a folder `Results/fan12` before running
   func run() {
-    let kHiddenDimension = 100
+    let kHiddenDimension = 512
     let dataDir = URL(fileURLWithPath: "./OIST_Data")
 
     let trainingDataset = OISTBeeVideo(directory: dataDir, length: 100)!
diff --git a/Scripts/main.swift b/Scripts/main.swift
index 9b96f9c1..c8337fea 100644
--- a/Scripts/main.swift
+++ b/Scripts/main.swift
@@ -17,7 +17,11 @@ import PenguinParallelWithFoundation
 
 struct Scripts: ParsableCommand {
   static var configuration = CommandConfiguration(
-    subcommands: [Andrew01.self, Fan01.self, Fan02.self, Fan03.self, Fan04.self, Fan05.self, Fan10.self, Fan12.self, Fan13.self, Fan14.self,
+    subcommands: [Brando01.self, Brando02.self, Brando03.self, Brando04.self,
+    Brando05.self, Brando06.self, Brando07.self, Brando08.self,
+    Brando10.self, Brando11.self, Brando12.self, Brando13.self, Brando14.self, Brando15.self, Brando16.self, Andrew01.self, 
+    Andrew05.self, Andrew06.self, Andrew07.self, Andrew08.self, Fan01.self, Fan02.self, Fan03.self, Fan04.self, Fan05.self, 
+    Fan10.self, Fan12.self, Fan13.self, Fan14.self,
                   Frank01.self, Frank02.self, Frank03.self, Frank04.self])
 }
 
diff --git a/Sources/BeeDataset/Visualization.swift b/Sources/BeeDataset/Visualization.swift
index 6558c08b..78975fb9 100644
--- a/Sources/BeeDataset/Visualization.swift
+++ b/Sources/BeeDataset/Visualization.swift
@@ -1,76 +1,76 @@
-// Copyright 2020 The SwiftFusion Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
+// // Copyright 2020 The SwiftFusion Authors. All Rights Reserved.
+// //
+// // Licensed under the Apache License, Version 2.0 (the "License");
+// // you may not use this file except in compliance with the License.
+// // You may obtain a copy of the License at
+// //
+// //     http://www.apache.org/licenses/LICENSE-2.0
+// //
+// // Unless required by applicable law or agreed to in writing, software
+// // distributed under the License is distributed on an "AS IS" BASIS,
+// // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// // See the License for the specific language governing permissions and
+// // limitations under the License.
 
-import SwiftFusion
-import TensorFlow
-import Plotly
-import ModelSupport
-import Foundation
+// import SwiftFusion
+// import TensorFlow
+// // import Plotly
+// import ModelSupport
+// import Foundation
 
-/// Creates a Plotly figure that displays `frame`, with optional `boxes` overlaid on
-/// them.
-public func plot<Scalar: TensorFlowFloatingPoint>(
-  _ frame: Tensor<Scalar>, boxes: [(name: String, OrientedBoundingBox)] = [],
-  margin: Double = 30, scale: Double = 1
-) -> Plotly.Figure {
-  let rows = Double(frame.shape[0])
-  let cols = Double(frame.shape[1])
+// /// Creates a Plotly figure that displays `frame`, with optional `boxes` overlaid on
+// /// them.
+// public func plot<Scalar: TensorFlowFloatingPoint>(
+//   _ frame: Tensor<Scalar>, boxes: [(name: String, OrientedBoundingBox)] = [],
+//   margin: Double = 30, scale: Double = 1
+// ) -> Plotly.Figure {
+//   let rows = Double(frame.shape[0])
+//   let cols = Double(frame.shape[1])
 
-  // Axis settings:
-  // - no grid
-  // - range is the image size
-  // - scale is anchored, to preserve image aspect ratio
-  // - y axis reversed so that everything is in "(u, v)" coordinates
-  let xAx = Layout.XAxis(range: [0, InfoArray(cols)], showGrid: false)
-  let yAx = Layout.YAxis(
-    autoRange: .reversed, range: [0, InfoArray(rows)], scaleAnchor: .xAxis(xAx), showGrid: false)
+//   // Axis settings:
+//   // - no grid
+//   // - range is the image size
+//   // - scale is anchored, to preserve image aspect ratio
+//   // - y axis reversed so that everything is in "(u, v)" coordinates
+//   let xAx = Layout.XAxis(range: [0, InfoArray(cols)], showGrid: false)
+//   let yAx = Layout.YAxis(
+//     autoRange: .reversed, range: [0, InfoArray(rows)], scaleAnchor: .xAxis(xAx), showGrid: false)
 
-  let tmpPath = URL(fileURLWithPath: "tmpForPlotlyDisplay.png")
-  ModelSupport.Image(Tensor<Float>(frame)).save(to: tmpPath)
-  let imageData = try! "data:image/png;base64," + Data(contentsOf: tmpPath).base64EncodedString()
+//   let tmpPath = URL(fileURLWithPath: "tmpForPlotlyDisplay.png")
+//   ModelSupport.Image(Tensor<Float>(frame)).save(to: tmpPath)
+//   let imageData = try! "data:image/png;base64," + Data(contentsOf: tmpPath).base64EncodedString()
 
-  return Figure(
-    data: [
-      // Dummy data because Plotly is confused when there is no data.
-      Scatter(
-        x: [0, cols], y: [0, rows],
-        mode: .markers, marker: Shared.GradientMarker(opacity: 0),
-        xAxis: xAx, yAxis: yAx
-      )
-    ] + boxes.map { box in
-      Scatter(
-        name: box.name,
-        x: box.1.corners.map { $0.x },
-        y: box.1.corners.map { $0.y },
-        xAxis: xAx,
-        yAxis: yAx
-      )
-    },
-    layout: Layout(
-      width: cols * scale + 2 * margin,
-      height: rows * scale + 2 * margin,
-      margin: Layout.Margin(l: margin, r: margin, t: margin, b: margin),
-      images: [
-        Layout.Image(
-          visible: true,
-          source: imageData,
-          layer: .below,
-          xSize: cols, ySize: rows,
-          sizing: .stretch,
-          x: 0, y: 0, xReference: .xAxis(xAx), yReference: .yAxis(yAx)
-        )
-      ]
-    )
-  )
-}
\ No newline at end of file
+//   return Figure(
+//     data: [
+//       // Dummy data because Plotly is confused when there is no data.
+//       Scatter(
+//         x: [0, cols], y: [0, rows],
+//         mode: .markers, marker: Shared.GradientMarker(opacity: 0),
+//         xAxis: xAx, yAxis: yAx
+//       )
+//     ] + boxes.map { box in
+//       Scatter(
+//         name: box.name,
+//         x: box.1.corners.map { $0.x },
+//         y: box.1.corners.map { $0.y },
+//         xAxis: xAx,
+//         yAxis: yAx
+//       )
+//     },
+//     layout: Layout(
+//       width: cols * scale + 2 * margin,
+//       height: rows * scale + 2 * margin,
+//       margin: Layout.Margin(l: margin, r: margin, t: margin, b: margin),
+//       images: [
+//         Layout.Image(
+//           visible: true,
+//           source: imageData,
+//           layer: .below,
+//           xSize: cols, ySize: rows,
+//           sizing: .stretch,
+//           x: 0, y: 0, xReference: .xAxis(xAx), yReference: .yAxis(yAx)
+//         )
+//       ]
+//     )
+//   )
+// }
\ No newline at end of file
diff --git a/Sources/BeeTracking/AppearanceRAE+Serialization.swift b/Sources/BeeTracking/AppearanceRAE+Serialization.swift
index 6c0d5f05..dc3116d6 100644
--- a/Sources/BeeTracking/AppearanceRAE+Serialization.swift
+++ b/Sources/BeeTracking/AppearanceRAE+Serialization.swift
@@ -71,4 +71,26 @@ extension DenseRAE {
       self.decoder_conv1.numpyWeights
     ].reduce([], +)
   }
+
 }
+
+
+extension NNClassifier {
+  /// Loads model weights from the numpy arrays in `weights`.
+  public mutating func load(weights: PythonObject) {
+    self.encoder_conv1.load(weights: weights[0..<2])
+    self.encoder1.load(weights: weights[2..<4])
+    self.encoder2.load(weights: weights[4..<6])
+    self.encoder3.load(weights: weights[6..<8])
+  }
+
+  /// The model weights as numpy arrays.
+  public var numpyWeights: PythonObject {
+    [
+      self.encoder_conv1.numpyWeights,
+      self.encoder1.numpyWeights,
+      self.encoder2.numpyWeights,
+      self.encoder3.numpyWeights
+    ].reduce([], +)
+  }
+}
\ No newline at end of file
diff --git a/Sources/BeeTracking/BigTransfer.swift b/Sources/BeeTracking/BigTransfer.swift
new file mode 100644
index 00000000..d1f57b56
--- /dev/null
+++ b/Sources/BeeTracking/BigTransfer.swift
@@ -0,0 +1,482 @@
+// Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// Original source:
+// "Big Transfer (BiT): General Visual Representation Learning"
+// Alexander Kolesnikov, Lucas Beyer, Xiaohua Zhai, Joan Puigcerver, Jessica Yung, Sylvain Gelly, Neil Houlsby.
+// https://arxiv.org/abs/1912.11370
+
+import Foundation
+import TensorFlow
+import PythonKit
+import BeeDataset
+import SwiftFusion
+
+let subprocess = Python.import("subprocess")
+let np  = Python.import("numpy")
+
+
+/// Convenient layer wrapper used to load all of the trained layers from the .npz file downloaded from the 
+/// BigTransfer weights repository
+struct BigTransferNamedLayer {
+    let name: String
+    var layer: Tensor<Float>
+}
+
+func getResolution(originalResolution: (Int, Int)) -> (Int, Int) {
+    let area = originalResolution.0 * originalResolution.1
+    return area < 96*96 ? (160, 128) : (512, 480)
+    }
+
+/// Get the necessary padding to maintain the network size specified in the BigTransfer architecture
+///
+/// - Parameters:
+///   - kernelSize: size n which represents the height and width of the nxn kernel
+/// - Returns: the left / top padding and the right / bottom padding necessary to maintain correct output sizes
+///            after convolution
+func paddingFromKernelSize(kernelSize: Int) -> [(before: Int, after: Int)] {
+  let padTotal = kernelSize - 1
+  let padBeginning = Int(padTotal / 2)
+  let padEnd = padTotal - padBeginning
+  let padding = [
+        (before: 0, after: 0),
+        (before: padBeginning, after: padEnd),
+        (before: padBeginning, after: padEnd),
+        (before: 0, after: 0)]
+  return padding
+}
+
+/// Get all of the pre-trained layers from the .npz file into a Swift array to load into the BigTransfer model
+///
+/// - Parameters:
+///   - modelName: model name that represents the weights to load from the BigTransfer weights repository 
+///                ("BiT-M-R50x1" for example)
+/// - Returns: an array of layers and their associated name in the .npz file downloaded from the weights repository
+func getPretrainedWeightsDict(modelName: String) -> Array<BigTransferNamedLayer> {
+  let validTypes = ["BiT-S", "BiT-M"]
+  let validSizes = [(50, 1), (50, 3), (101, 1), (101, 3), (152, 4)]
+  let bitURL = "https://storage.googleapis.com/bit_models/"
+  var knownModels = [String: String]()
+
+  for types in validTypes {
+    for sizes in validSizes {
+      let modelString = types + "-R" + String(sizes.0) + "x" + String(sizes.1)
+      knownModels[modelString] = bitURL + modelString + ".npz"
+    }
+  }
+  
+  if let modelPath = knownModels[modelName] {
+    subprocess.call("wget " + modelPath + " .", shell: true)
+  }
+
+  let weights = np.load("./" + modelName + ".npz")
+
+  var weightsArray = Array<BigTransferNamedLayer>()
+  for param in weights {
+      weightsArray.append(BigTransferNamedLayer(name: String(param)!, layer: Tensor<Float>(numpy: weights[param])!))
+  }
+  return weightsArray
+}
+
+/// A 2D Convolution layer that standardizes the weights before the forward pass. This has been implemented in
+/// accordance with the implementation in https://github.com/google-research/big_transfer/blob/49afe42338b62af9fbe18f0258197a33ee578a6b/bit_pytorch/models.py#L25
+public struct StandardizedConv2D: Layer {
+  public var conv: Conv2D<Float>
+
+  public init(
+    filterShape: (Int, Int, Int, Int),
+    strides: (Int, Int) = (1, 1),
+    padding: Padding = .valid,
+    useBias: Bool = true
+  )
+  {
+  self.conv = Conv2D(
+      filterShape: filterShape, 
+      strides: strides, 
+      padding: padding,
+      useBias: useBias)
+  }
+
+  @differentiable
+  public func callAsFunction(_ input: Tensor<Float>) -> Tensor<Float> {
+      let axes: Array<Int> = [0, 1, 2]
+      var standardizedConv = conv
+      standardizedConv.filter = (standardizedConv.filter - standardizedConv.filter.mean(squeezingAxes: axes)) / sqrt((standardizedConv.filter.variance(squeezingAxes: axes) + 1e-16))
+      return standardizedConv(input)
+  }
+
+}
+
+/// A standardized convolution and group norm layer as specified in the BigTransfer architecture
+public struct ConvGNV2BiT: Layer {
+    public var conv: StandardizedConv2D
+    public var norm: GroupNorm<Float>
+    @noDerivative public var isSecond: Bool
+
+    public init(
+        inFilters: Int,
+        outFilters: Int,
+        kernelSize: Int = 1,
+        stride: Int = 1,
+        padding: Padding = .valid,
+        isSecond: Bool = false
+    ) {
+        self.conv = StandardizedConv2D(
+            filterShape: (kernelSize, kernelSize, inFilters, outFilters), 
+            strides: (stride, stride), 
+            padding: padding,
+            useBias: false)
+        self.norm = GroupNorm<Float>(
+              offset: Tensor(zeros: [inFilters]),
+              scale: Tensor(zeros: [inFilters]),
+              groupCount: 2,
+              axis: -1,
+              epsilon: 0.001)
+        self.isSecond = isSecond
+    }
+
+    @differentiable
+    public func callAsFunction(_ input: Tensor<Float>) -> Tensor<Float> {
+        var normResult = norm(input)
+        if self.isSecond {
+            normResult = normResult.padded(forSizes: paddingFromKernelSize(kernelSize: 3))
+        }
+        let reluResult = relu(normResult)
+        let convResult = conv(reluResult)
+        return convResult
+    }
+}
+
+/// The shortcut in a residual block with standardized convolution and group normalization
+public struct ShortcutBiT: Layer {
+    public var projection: StandardizedConv2D
+    public var norm: GroupNorm<Float>
+    @noDerivative public let needsProjection: Bool
+    
+    public init(inFilters: Int, outFilters: Int, stride: Int) {
+      needsProjection = (stride > 1 || inFilters != outFilters)
+      norm = GroupNorm<Float>(
+          offset: Tensor(zeros: [needsProjection ? inFilters  : 1]),
+          scale: Tensor(zeros: [needsProjection ? inFilters  : 1]),
+          groupCount: needsProjection ? 2  : 1,
+          axis: -1,
+          epsilon: 0.001)
+        
+        projection =  StandardizedConv2D(
+            filterShape: (1, 1, needsProjection ? inFilters  : 1, needsProjection ? outFilters : 1), 
+            strides: (stride, stride), 
+            padding: .valid,
+            useBias: false)
+    }
+    
+    @differentiable
+    public func callAsFunction(_ input: Tensor<Float>) -> Tensor<Float> {
+        var res = input
+        if needsProjection { 
+          res = norm(res)
+          res = relu(res)
+          res = projection(res)
+        }
+        return res
+    }
+}
+
+/// Residual block for BigTransfer with standardized convolution and group normalization layers
+public struct ResidualBlockBiT: Layer {
+    public var shortcut: ShortcutBiT
+    public var convs: [ConvGNV2BiT]
+
+    public init(inFilters: Int, outFilters: Int, stride: Int, expansion: Int){
+        if expansion == 1 {
+            convs = [
+                ConvGNV2BiT(inFilters: inFilters,  outFilters: outFilters, kernelSize: 3, stride: stride),
+                ConvGNV2BiT(inFilters: outFilters, outFilters: outFilters, kernelSize: 3, isSecond: true)
+            ]
+        } else {
+            convs = [
+                ConvGNV2BiT(inFilters: inFilters,    outFilters: outFilters/4),
+                ConvGNV2BiT(inFilters: outFilters/4, outFilters: outFilters/4, kernelSize: 3, stride: stride, isSecond: true),
+                ConvGNV2BiT(inFilters: outFilters/4, outFilters: outFilters)
+            ]
+        }
+        shortcut = ShortcutBiT(inFilters: inFilters, outFilters: outFilters, stride: stride)
+    }
+
+    @differentiable
+    public func callAsFunction(_ input: Tensor<Float>) -> Tensor<Float> {
+        let convResult = convs.differentiableReduce(input) { $1($0) }
+        return convResult + shortcut(input)
+    }
+}
+
+/// An implementation of the BigTransfer architecture with variable sizes
+public struct BigTransfer: Layer {
+  public var inputStem: StandardizedConv2D
+  public var maxPool: MaxPool2D<Float>
+  public var residualBlocks: [ResidualBlockBiT] = []
+  public var groupNorm : GroupNorm<Float>
+  public var flatten = Flatten<Float>()
+  public var classifier: Dense<Float>
+  public var avgPool = GlobalAvgPool2D<Float>()
+  @noDerivative public var finalOutFilter : Int = 0
+
+  /// Initialize the BigTransfer Model
+  ///
+  /// - Parameters:
+  ///   - classCount: the number of output classes
+  ///   - depth: the specified depht of the network based on the various ResNet architectures
+  ///   - inputChannels: the number of input channels for the dataset
+  ///   - stemFilters: the number of filters in the first three convolutions
+  public init(
+        classCount: Int, 
+        depth: Depth, 
+        inputChannels: Int = 3,
+        modelName: String = "BiT-M-R50x1",
+        loadWeights: Bool = true
+    ) {
+
+        self.inputStem = StandardizedConv2D(filterShape: (7, 7, 3, 64), strides: (2, 2), padding: .valid, useBias: false)
+        self.maxPool = MaxPool2D(poolSize: (3, 3), strides: (2, 2), padding: .valid)
+        let sizes = [64 / depth.expansion, 64, 128, 256, 512]
+        for (iBlock, nBlocks) in depth.layerBlockSizes.enumerated() {
+            let (nIn, nOut) = (sizes[iBlock] * depth.expansion, sizes[iBlock+1] * depth.expansion)
+            for j in 0..<nBlocks {
+
+                self.residualBlocks.append(ResidualBlockBiT(
+                    inFilters: j==0 ? nIn : nOut,  
+                    outFilters: nOut, 
+                    stride: (iBlock != 0) && (j == 0) ? 2 : 1, 
+                    expansion: depth.expansion
+                ))
+                self.finalOutFilter = nOut
+            }
+        }
+        self.groupNorm = GroupNorm<Float>(
+              offset: Tensor(zeros: [self.finalOutFilter]),
+              scale: Tensor(zeros: [self.finalOutFilter]),
+              groupCount: 2,
+              axis: -1,
+              epsilon: 0.001)
+        self.classifier = Dense(inputSize: 512 * depth.expansion, outputSize: classCount)
+        
+        if loadWeights {
+            let weightsArray = getPretrainedWeightsDict(modelName: modelName)
+
+            // Load weights from model .npz file into the BigTransfer model
+            let convs = weightsArray.filter {key in return key.name.contains("/block") && key.name.contains("standardized_conv2d/kernel") && !(key.name.contains("proj"))}
+            
+            var k = 0
+            for (idx, i) in self.residualBlocks.enumerated() {
+                for (jdx, _) in i.convs.enumerated() {
+                assert(self.residualBlocks[idx].convs[jdx].conv.conv.filter.shape == convs[k].layer.shape)
+                self.residualBlocks[idx].convs[jdx].conv.conv.filter = convs[k].layer
+                k = k + 1
+                }
+            }
+
+            let projectiveConvs = weightsArray.filter {key in return key.name.contains("/block") && key.name.contains("standardized_conv2d/kernel") && (key.name.contains("proj"))}
+            var normScale = weightsArray.filter {key in return key.name.contains("unit01/a/group_norm/gamma")}
+            var normOffset = weightsArray.filter {key in return key.name.contains("unit01/a/group_norm/beta")}
+
+            k = 0
+            for (idx, i) in self.residualBlocks.enumerated() {
+                if (i.shortcut.projection.conv.filter.shape != [1, 1, 1, 1])
+                {
+                    assert(self.residualBlocks[idx].shortcut.projection.conv.filter.shape == projectiveConvs[k].layer.shape)
+                    self.residualBlocks[idx].shortcut.projection.conv.filter = projectiveConvs[k].layer
+
+                    assert(self.residualBlocks[idx].shortcut.norm.scale.shape == normScale[k].layer.shape)
+                    self.residualBlocks[idx].shortcut.norm.scale = normScale[k].layer
+
+                    assert(self.residualBlocks[idx].shortcut.norm.offset.shape == normOffset[k].layer.shape)
+                    self.residualBlocks[idx].shortcut.norm.offset = normOffset[k].layer
+                    k = k + 1
+                }
+            }
+
+            normScale = weightsArray.filter {key in return key.name.contains("gamma")}
+            k = 0
+            for (idx, i) in self.residualBlocks.enumerated() {
+                for (jdx, _) in i.convs.enumerated() {
+                assert(normScale[k].layer.shape == self.residualBlocks[idx].convs[jdx].norm.scale.shape)
+                self.residualBlocks[idx].convs[jdx].norm.scale = normScale[k].layer
+                k = k + 1
+                }
+            }
+
+            normOffset = weightsArray.filter {key in return key.name.contains("beta")}
+
+            var l = 0
+            for (idx, i) in self.residualBlocks.enumerated() {
+                for (jdx, _) in i.convs.enumerated() {
+                assert(normOffset[l].layer.shape == self.residualBlocks[idx].convs[jdx].norm.offset.shape)
+                self.residualBlocks[idx].convs[jdx].norm.offset = normOffset[l].layer
+                l = l + 1
+                }
+            }
+
+            assert(self.groupNorm.scale.shape == normScale[k].layer.shape)
+            self.groupNorm.scale = normScale[k].layer
+            assert(self.groupNorm.offset.shape == normOffset[l].layer.shape)
+            self.groupNorm.offset = normOffset[l].layer
+
+            let rootConvs = weightsArray.filter {key in return key.name.contains("root_block")}
+            assert(self.inputStem.conv.filter.shape == rootConvs[0].layer.shape)
+            self.inputStem.conv.filter = rootConvs[0].layer
+        }
+    }
+
+  @differentiable(wrt: imageBatch)
+  public func classify(_ imageBatch: Tensor<Double>) -> Tensor<Double> {
+        var grayscale = Tensor<Float>(imageBatch.unstacked(alongAxis: 3)[0])
+        var rgbImages = Tensor<Float>(stacking: [grayscale, grayscale, grayscale], alongAxis: 3)
+
+        var resizeSize = getResolution(originalResolution: (40, 70))
+        var resized = resize(images: rgbImages, size: (resizeSize.0, resizeSize.1))
+        return Tensor<Double>(callAsFunction(resized))
+  }
+
+  @differentiable
+  public func callAsFunction(_ input: Tensor<Float>) -> Tensor<Float> {
+      var paddedInput = input.padded(forSizes: paddingFromKernelSize(kernelSize: 7))
+      paddedInput = inputStem(paddedInput).padded(forSizes: paddingFromKernelSize(kernelSize: 3))
+      let inputLayer = maxPool(paddedInput)
+      let blocksReduced = residualBlocks.differentiableReduce(inputLayer) { $1($0) }
+      let normalized = relu(groupNorm(blocksReduced))
+      return normalized.sequenced(through: avgPool, flatten, classifier)
+  }
+
+  public func save(new_path: String = "new_weights", path: String = "BiT-M-R50x1") {
+        var weightsArray = getPretrainedWeightsDict(modelName: path)
+
+        // Load weights from model .npz file into the BigTransfer model
+        var convs = weightsArray.filter {key in return key.name.contains("/block") && key.name.contains("standardized_conv2d/kernel") && !(key.name.contains("proj"))}
+        var k = 0
+        for (idx, i) in self.residualBlocks.enumerated() {
+            for (jdx, _) in i.convs.enumerated() {
+            assert(self.residualBlocks[idx].convs[jdx].conv.conv.filter.shape == convs[k].layer.shape)
+            convs[k].layer = self.residualBlocks[idx].convs[jdx].conv.conv.filter
+            k = k + 1
+            }
+        }
+
+        var projectiveConvs = weightsArray.filter {key in return key.name.contains("/block") && key.name.contains("standardized_conv2d/kernel") && (key.name.contains("proj"))}
+        var normScale = weightsArray.filter {key in return key.name.contains("unit01/a/group_norm/gamma")}
+        var normOffset = weightsArray.filter {key in return key.name.contains("unit01/a/group_norm/beta")}
+
+        k = 0
+        for (idx, i) in self.residualBlocks.enumerated() {
+            if (i.shortcut.projection.conv.filter.shape != [1, 1, 1, 1])
+            {
+                assert(self.residualBlocks[idx].shortcut.projection.conv.filter.shape == projectiveConvs[k].layer.shape)
+                projectiveConvs[k].layer = self.residualBlocks[idx].shortcut.projection.conv.filter
+                assert(self.residualBlocks[idx].shortcut.norm.scale.shape == normScale[k].layer.shape)
+                normScale[k].layer = self.residualBlocks[idx].shortcut.norm.scale
+
+                assert(self.residualBlocks[idx].shortcut.norm.offset.shape == normOffset[k].layer.shape)
+                normOffset[k].layer = self.residualBlocks[idx].shortcut.norm.offset
+                k = k + 1
+            }
+        }
+
+        var gammaNormScale = weightsArray.filter {key in return key.name.contains("gamma")}
+        k = 0
+        for (idx, i) in self.residualBlocks.enumerated() {
+            for (jdx, _) in i.convs.enumerated() {
+            assert(gammaNormScale[k].layer.shape == self.residualBlocks[idx].convs[jdx].norm.scale.shape)
+            gammaNormScale[k].layer = self.residualBlocks[idx].convs[jdx].norm.scale
+            k = k + 1
+            }
+        }
+
+        var betaNormOffset = weightsArray.filter {key in return key.name.contains("beta")}
+
+        var l = 0
+        for (idx, i) in self.residualBlocks.enumerated() {
+            for (jdx, _) in i.convs.enumerated() {
+            assert(betaNormOffset[l].layer.shape == self.residualBlocks[idx].convs[jdx].norm.offset.shape)
+            betaNormOffset[l].layer = self.residualBlocks[idx].convs[jdx].norm.offset
+            l = l + 1
+            }
+        }
+
+        assert(self.groupNorm.scale.shape == gammaNormScale[k].layer.shape)
+        gammaNormScale[k].layer = self.groupNorm.scale
+        assert(self.groupNorm.offset.shape == betaNormOffset[l].layer.shape)
+        betaNormOffset[l].layer = self.groupNorm.offset
+
+        var rootConvs = weightsArray.filter {key in return key.name.contains("root_block")}
+        assert(self.inputStem.conv.filter.shape == rootConvs[0].layer.shape)
+        rootConvs[0].layer = self.inputStem.conv.filter
+
+        var newWeights = convs + projectiveConvs + normScale + normOffset + gammaNormScale + betaNormOffset
+
+        var weightDict: [String: Tensor<Float>] = [rootConvs[0].name: rootConvs[0].layer]
+
+        for (i, weight) in newWeights.enumerated() {
+            weightDict[weight.name] = weight.layer
+        }
+        
+        let np = Python.import("numpy")
+        let zipfile = Python.import("zipfile")
+        let format = Python.import("numpy.lib.format")
+        let compat = Python.import("numpy.compat")
+        var file = compat.os_fspath(new_path + ".npz")
+        var zipf = zipfile.ZipFile(file, mode: "w", compression: zipfile.ZIP_STORED, allowZip64: true)
+
+        for (i, weight) in weightsArray.enumerated() {
+            var fname = weight.name + ".npy"
+            let keyExists = weightDict[weight.name] != nil
+            var val = np.asanyarray( weight.layer.makeNumpyArray())
+            if keyExists {
+                print("key does exist")
+                val = weightDict[weight.name]!.makeNumpyArray()
+            }
+            var file_id = zipf.open(fname, "w", force_zip64: true)
+            format.write_array(file_id, weight.layer.makeNumpyArray(),
+                        allow_pickle: true)
+        }
+        
+    }
+}
+
+
+
+extension BigTransfer {
+    public enum Depth {
+        case resNet18
+        case resNet34
+        case resNet50
+        case resNet101
+        case resNet152
+
+        var expansion: Int {
+            switch self {
+            case .resNet18, .resNet34: return 1
+            default: return 4
+            }
+        }
+
+        var layerBlockSizes: [Int] {
+            switch self {
+            case .resNet18:  return [2, 2, 2,  2]
+            case .resNet34:  return [3, 4, 6,  3]
+            case .resNet50:  return [3, 4, 6,  3]
+            case .resNet101: return [3, 4, 23, 3]
+            case .resNet152: return [3, 8, 36, 3]
+            }
+        }
+    }
+}
+
+extension BigTransfer: Classifier {}
\ No newline at end of file
diff --git a/Sources/BeeTracking/NNClassifier.swift b/Sources/BeeTracking/NNClassifier.swift
new file mode 100644
index 00000000..cf49f0b7
--- /dev/null
+++ b/Sources/BeeTracking/NNClassifier.swift
@@ -0,0 +1,607 @@
+// Copyright 2020 The SwiftFusion Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+import SwiftFusion
+import TensorFlow
+import PythonKit
+import BeeDataset
+
+// MARK: - The Regularized Autoencoder model
+/// A Regularized Autoencoder (RAE) [1] that encodes the appearance of an image patch.
+///
+
+public struct BeeBatch {
+  let patch: Tensor<Double>
+  let label: Tensor<Int8>
+}
+/// Conform `IrisBatch` to `Collatable` so that we can load it into a `TrainingEpoch`.
+extension BeeBatch: Collatable {
+    public init<BatchSamples: Collection>(collating samples: BatchSamples)
+        where BatchSamples.Element == Self {
+        patch = Tensor<Double>(stacking: samples.map{$0.patch})
+        label = Tensor<Int8>(stacking: samples.map{$0.label})
+    }
+}
+
+
+/// [1] https://openreview.net/forum?id=S1g7tpEYDS
+public struct NNClassifier: Layer{
+  /// The height of the input image in pixels.
+  @noDerivative public let imageHeight: Int
+
+  /// The width of the input image in pixels.
+  @noDerivative public let imageWidth: Int
+
+  /// The number of channels in the input image.
+  @noDerivative public let imageChannels: Int
+
+  /// The number of activations in the hidden layer.
+  @noDerivative public let hiddenDimension: Int
+
+  /// The number of activations in the appearance code.
+  @noDerivative public let latentDimension: Int
+
+  /// First conv to downside the image
+  public var encoder_conv1: Conv2D<Double>
+
+  /// Max pooling of factor 2
+  var encoder_pool1: MaxPool2D<Double>
+
+  /// First FCN encoding layer goes from image to hidden dimension
+  public var encoder1: Dense<Double>
+
+  /// Second goes from dense features to latent code
+  public var encoder2: Dense<Double>
+
+  /// Third goes from latent to 1
+  public var encoder3: Dense<Double>
+
+  // /// Decode from latent to dense hidden layer with same dimsnions as before
+  // var decoder1: Dense<Double>
+
+  // /// Finally, reconstruct grayscale (or RGB) image
+  // var decoder2: Dense<Double>
+
+  // var decoder_upsample1: UpSampling2D<Double>
+
+  // var decoder_conv1: Conv2D<Double>
+
+  /// Creates an instance for images with size `[imageHeight, imageWidth, imageChannels]`, with
+  /// hidden and latent dimensions given by `hiddenDimension` and `latentDimension`.
+  public init(
+    imageHeight: Int, imageWidth: Int, imageChannels: Int,
+    hiddenDimension: Int, latentDimension: Int
+  ) {
+    self.imageHeight = imageHeight
+    self.imageWidth = imageWidth
+    self.imageChannels = imageChannels
+    self.hiddenDimension = hiddenDimension
+    self.latentDimension = latentDimension
+
+    encoder_conv1 = Conv2D<Double>(filterShape: (3, 3, imageChannels, imageChannels), padding: .same, activation: relu)
+
+    encoder_pool1 = MaxPool2D<Double>(poolSize: (2, 2), strides: (2, 2), padding: .same)
+    
+    encoder1 = Dense<Double>(
+      inputSize: imageHeight * imageWidth * imageChannels / 4,
+      outputSize: hiddenDimension,
+      activation: relu)
+
+    encoder2 = Dense<Double>(
+      inputSize: hiddenDimension,
+      outputSize: latentDimension,
+      activation: relu)
+
+    encoder3 = Dense<Double>(
+      inputSize: latentDimension,
+      outputSize: 2)
+
+    }
+
+  /// Initialize  given an image batch
+  public typealias HyperParameters = (hiddenDimension: Int, latentDimension: Int, weightFile: String, learningRate: Float)
+  // public init(from imageBatch: Tensor<Double>, given parameters: HyperParameters? = nil) {
+  public init(patches patches: Tensor<Double>, labels labels: Tensor<Int8>, given parameters: HyperParameters? = nil, train_mode: String) {
+    print("init from image batch")
+    let (H_, W_, C_) = (patches.shape[1], patches.shape[2], 1)
+    let h = parameters!.hiddenDimension
+    let d = parameters!.latentDimension
+    var model = NNClassifier(imageHeight: H_, imageWidth: W_, imageChannels: C_,
+              hiddenDimension: h, latentDimension: d)
+    if train_mode == "pretrained" {
+      print("PRETRAINED")
+      let np = Python.import("numpy")
+      print("loading pretrained weights")
+      model.load(weights: np.load(parameters!.weightFile, allow_pickle: true))
+    }
+    
+    
+
+    let optimizer = Adam(for: model)
+    optimizer.learningRate = parameters!.learningRate
+    
+    let lossFunc = NNClassifierLoss()
+    // Issues I came across: TrainingEpochs function was scrambling the order
+    // Then the map function was too slow during training.
+    
+    // Thread-local variable that model layers read to know their mode
+    Context.local.learningPhase = .training
+
+    let trainingData : [BeeBatch] = (zip(patches.unstacked(), labels.unstacked()).map{BeeBatch(patch: $0.0, label: $0.1)})
+
+    let epochs = TrainingEpochs(samples: trainingData, batchSize: 200) // this is an array
+    //  
+    var trainLossResults: [Double] = []
+    let epochCount = 100
+    for (epochIndex, epoch) in epochs.prefix(epochCount).enumerated() {
+      var epochLoss: Double = 0
+      var batchCount: Int = 0
+      for batchSamples in epoch {
+        let batch = batchSamples.collated
+        let (loss, grad) = valueWithGradient(at: model) { lossFunc($0, batch) }
+        optimizer.update(&model, along: grad)
+        epochLoss += loss.scalarized()
+        batchCount += 1
+      }
+      epochLoss /= Double(batchCount)
+      trainLossResults.append(epochLoss)
+      if epochIndex % 5 == 0 {
+        print("\nEpoch \(epochIndex):", terminator:"")
+      }
+      print(" \(epochLoss),", terminator: "")
+    }
+    
+    self = model
+  }
+
+  /// Differentiable encoder
+  @differentiable(wrt: imageBatch)
+  public func classify(_ imageBatch: Tensor<Double>) -> Tensor<Double> {
+    let batchSize = imageBatch.shape[0]
+    let expectedShape: TensorShape = [batchSize, imageHeight, imageWidth, imageChannels]
+    precondition(
+        imageBatch.shape == expectedShape,
+        "input shape is \(imageBatch.shape), but expected \(expectedShape)")
+    return imageBatch
+      .sequenced(through: encoder_conv1, encoder_pool1).reshaped(to: [batchSize, imageHeight * imageWidth * imageChannels / 4])
+      .sequenced(through: encoder1, encoder2, encoder3)
+  }
+
+  /// Standard: add syntactic sugar to apply model as a function call.
+  @differentiable
+  public func callAsFunction(_ imageBatch: Tensor<Double>) -> Tensor<Double> {
+    let output = classify(imageBatch)
+    return output
+  }
+}
+
+
+
+/// The loss function for the `DenseRAE`.
+public struct NNClassifierLoss {
+
+  /// Return the loss of `model` on `imageBatch`.
+  /// Parameter printLoss: Whether to print the loss and its components.
+  @differentiable
+  public func callAsFunction(
+    _ model: NNClassifier, _ imageBatch: BeeBatch, printLoss: Bool = false
+  ) -> Tensor<Double> {
+    let batchSize = imageBatch.patch.shape[0]
+    let output = model(imageBatch.patch)
+    let totalLoss = softmaxCrossEntropy(logits: output, labels: Tensor<Int32>(imageBatch.label))
+    return totalLoss
+  }
+
+}
+
+extension NNClassifier: Classifier {}
+
+
+
+public struct PretrainedNNClassifier : Classifier{
+  public var inner: NNClassifier
+  
+  /// Constructor that does training of the network
+  public init(patches patches: Tensor<Double>, labels labels: Tensor<Int8>, given: HyperParameters, train_mode: String) {
+    inner = NNClassifier(
+    patches: patches, labels: labels, given: (given != nil) ? 
+                                (hiddenDimension: given.hiddenDimension, 
+                                latentDimension: given.latentDimension, 
+                                weightFile: given.weightFile, 
+                                learningRate: given.learningRate) : nil, train_mode: train_mode
+    )
+
+    
+  }
+  
+  /// Save the weight to file
+  public func save(to path: String) {
+    let np = Python.import("numpy")
+    np.save(path, np.array(inner.numpyWeights, dtype: Python.object))
+  }
+
+  @differentiable
+  public func classify(_ imageBatch: Tensor<Double>) -> Tensor<Double> {
+    inner.classify(imageBatch)
+  }
+  
+  /// Initialize  given an image batch
+  public typealias HyperParameters = (hiddenDimension: Int, latentDimension: Int, weightFile: String, learningRate: Float)
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+// /// [1] https://openreview.net/forum?id=S1g7tpEYDS
+// public struct SmallerNNClassifier: Layer{
+//   @noDerivative public let imageHeight: Int
+//   @noDerivative public let imageWidth: Int
+//   @noDerivative public let imageChannels: Int
+//   @noDerivative public let latentDimension: Int
+//   public var encoder_conv1: Conv2D<Double>
+//   var encoder_pool1: MaxPool2D<Double>
+//   public var encoder1: Dense<Double>
+//   public var encoder2: Dense<Double>
+
+//   public init(
+//     imageHeight: Int, imageWidth: Int, imageChannels: Int, latentDimension: Int
+//   ) {
+//     self.imageHeight = imageHeight
+//     self.imageWidth = imageWidth
+//     self.imageChannels = imageChannels
+//     self.latentDimension = latentDimension
+
+//     encoder_conv1 = Conv2D<Double>(filterShape: (3, 3, imageChannels, imageChannels), padding: .same, activation: relu)
+
+//     encoder_pool1 = MaxPool2D<Double>(poolSize: (2, 2), strides: (2, 2), padding: .same)
+
+//     encoder1 = Dense<Double>(
+//       inputSize: imageHeight * imageWidth * imageChannels / 4,
+//       outputSize: latentDimension,
+//       activation: relu)
+
+//     encoder2 = Dense<Double>(
+//       inputSize: latentDimension,
+//       outputSize: 2)
+
+//     }
+
+//   /// Initialize  given an image batch
+//   public init(patches patches: Tensor<Double>, labels labels: Tensor<Int8>, given latentDimension: Int? = nil) {
+//     print("init from image batch")
+//     let (H_, W_, C_) = (patches.shape[1], patches.shape[2], 1)
+//     let d = latentDimension ?? 10
+//     var model = SmallerNNClassifier(imageHeight: H_, imageWidth: W_, imageChannels: C_, latentDimension: d)
+
+//     let optimizer = Adam(for: model)
+//     optimizer.learningRate = 1e-3
+    
+//     let lossFunc = NNClassifierLoss()
+//     Context.local.learningPhase = .training
+//     let trainingData : [BeeBatch] = (zip(patches.unstacked(), labels.unstacked()).map{BeeBatch(patch: $0.0, label: $0.1)})
+//     let epochs = TrainingEpochs(samples: trainingData, batchSize: 200) // this is an array
+//     var trainLossResults: [Double] = []
+//     let epochCount = 600
+//     for (epochIndex, epoch) in epochs.prefix(epochCount).enumerated() {
+//       var epochLoss: Double = 0
+//       var batchCount: Int = 0
+//       for batchSamples in epoch {
+//         let batch = batchSamples.collated
+//         let (loss, grad) = valueWithGradient(at: model) { lossFunc($0, batch) }
+//         optimizer.update(&model, along: grad)
+//         epochLoss += loss.scalarized()
+//         batchCount += 1
+//       }
+//       epochLoss /= Double(batchCount)
+//       trainLossResults.append(epochLoss)
+//       // if epochIndex % 50 == 0 {
+//       print("Epoch \(epochIndex): Loss: \(epochLoss)")
+//       // }
+//     }
+    
+//     self = model
+//   }
+
+//   /// Differentiable encoder
+//   @differentiable(wrt: imageBatch)
+//   public func classify(_ imageBatch: Tensor<Double>) -> Tensor<Double> {
+//     let batchSize = imageBatch.shape[0]
+//     let expectedShape: TensorShape = [batchSize, imageHeight, imageWidth, imageChannels]
+//     precondition(
+//         imageBatch.shape == expectedShape,
+//         "input shape is \(imageBatch.shape), but expected \(expectedShape)")
+//     return imageBatch
+//       .sequenced(through: encoder_conv1, encoder_pool1).reshaped(to: [batchSize, imageHeight * imageWidth * imageChannels / 4])
+//       .sequenced(through: encoder1, encoder2)
+//   }
+
+//   /// Standard: add syntactic sugar to apply model as a function call.
+//   @differentiable
+//   public func callAsFunction(_ imageBatch: Tensor<Double>) -> Tensor<Double> {
+//     let output = classify(imageBatch)
+//     return output
+//   }
+// }
+
+// public struct LargerNNClassifier: Layer{
+//   @noDerivative public let imageHeight: Int
+//   @noDerivative public let imageWidth: Int
+//   @noDerivative public let imageChannels: Int
+//   @noDerivative public let hiddenDimension: Int
+//   @noDerivative public let latentDimension: Int
+//   public var encoder_conv1: Conv2D<Double>
+//   var encoder_pool1: MaxPool2D<Double>
+//   public var encoder1: Dense<Double>
+//   public var encoder2: Dense<Double>
+//   public var encoder3: Dense<Double>
+//   public var encoder4: Dense<Double>
+//   public init(
+//     imageHeight: Int, imageWidth: Int, imageChannels: Int,
+//     hiddenDimension: Int, latentDimension: Int
+//   ) {
+//     self.imageHeight = imageHeight
+//     self.imageWidth = imageWidth
+//     self.imageChannels = imageChannels
+//     self.hiddenDimension = hiddenDimension
+//     self.latentDimension = latentDimension
+
+//     encoder_conv1 = Conv2D<Double>(filterShape: (3, 3, imageChannels, imageChannels), padding: .same, activation: relu)
+
+//     encoder_pool1 = MaxPool2D<Double>(poolSize: (2, 2), strides: (2, 2), padding: .same)
+
+//     encoder1 = Dense<Double>(
+//       inputSize: imageHeight * imageWidth * imageChannels / 4,
+//       outputSize: hiddenDimension,
+//       activation: relu)
+
+//     encoder2 = Dense<Double>(
+//       inputSize: hiddenDimension,
+//       outputSize: hiddenDimension,
+//       activation: relu)
+
+//     encoder3 = Dense<Double>(
+//       inputSize: hiddenDimension,
+//       outputSize: latentDimension,
+//       activation: relu)
+
+//     encoder4 = Dense<Double>(
+//       inputSize: latentDimension,
+//       outputSize: 2)
+
+//     }
+
+//   /// Initialize  given an image batch
+//   public typealias HyperParameters = (hiddenDimension: Int, latentDimension: Int)
+//   // public init(from imageBatch: Tensor<Double>, given parameters: HyperParameters? = nil) {
+//   public init(patches patches: Tensor<Double>, labels labels: Tensor<Int8>, given parameters: HyperParameters? = nil) {
+//     print("init from image batch")
+//     let (H_, W_, C_) = (patches.shape[1], patches.shape[2], 1)
+//     let (h,d) = parameters ?? (100,10)
+//     var model = LargerNNClassifier(imageHeight: H_, imageWidth: W_, imageChannels: C_,
+//               hiddenDimension: h, latentDimension: d)
+//     let optimizer = Adam(for: model)
+//     optimizer.learningRate = 1e-3
+//     let lossFunc = NNClassifierLoss()
+//     Context.local.learningPhase = .training
+//     let trainingData : [BeeBatch] = (zip(patches.unstacked(), labels.unstacked()).map{BeeBatch(patch: $0.0, label: $0.1)})
+//     let epochs = TrainingEpochs(samples: trainingData, batchSize: 200) // this is an array
+//     //  
+//     var trainLossResults: [Double] = []
+//     let epochCount = 600
+//     for (epochIndex, epoch) in epochs.prefix(epochCount).enumerated() {
+//       var epochLoss: Double = 0
+//       var batchCount: Int = 0
+//       for batchSamples in epoch {
+//         let batch = batchSamples.collated
+//         let (loss, grad) = valueWithGradient(at: model) { lossFunc($0, batch) }
+//         optimizer.update(&model, along: grad)
+//         epochLoss += loss.scalarized()
+//         batchCount += 1
+//       }
+//       epochLoss /= Double(batchCount)
+//       trainLossResults.append(epochLoss)
+//       if epochIndex % 5 == 0 {
+//         print("\nEpoch \(epochIndex):", terminator:"")
+//       }
+//       print(" \(epochLoss),", terminator: "")
+//     }
+    
+//     // if NSFileManager.fileExistsAtPath(path) {
+//     //     print("File exists")
+//     // } else {
+//     //     print("File does not exist")
+//     // }
+//     // np.save("epochloss\()", Tensor(trainLossResults).makeNumpyArray())
+    
+//     self = model
+//   }
+
+//   /// Differentiable encoder
+//   @differentiable(wrt: imageBatch)
+//   public func classify(_ imageBatch: Tensor<Double>) -> Tensor<Double> {
+//     let batchSize = imageBatch.shape[0]
+//     let expectedShape: TensorShape = [batchSize, imageHeight, imageWidth, imageChannels]
+//     precondition(
+//         imageBatch.shape == expectedShape,
+//         "input shape is \(imageBatch.shape), but expected \(expectedShape)")
+//     return imageBatch
+//       .sequenced(through: encoder_conv1, encoder_pool1).reshaped(to: [batchSize, imageHeight * imageWidth * imageChannels / 4])
+//       .sequenced(through: encoder1, encoder2, encoder3, encoder4)
+//   }
+
+//   /// Standard: add syntactic sugar to apply model as a function call.
+//   @differentiable
+//   public func callAsFunction(_ imageBatch: Tensor<Double>) -> Tensor<Double> {
+//     let output = classify(imageBatch)
+//     return output
+//   }
+// }
+
+
+
+
+
+
+// public struct PretrainedSmallerNNClassifier : Classifier{
+//   public var inner: SmallerNNClassifier
+  
+//   /// The constructor that only does loading of the pretrained weights.
+//   public init(from imageBatch: Tensor<Double>, given: HyperParameters?) {
+//     let shape = imageBatch.shape
+//     precondition(imageBatch.rank == 4, "Wrong image shape \(shape)")
+//     let (_, H_, W_, C_) = (shape[0], shape[1], shape[2], shape[3])
+//     if let params = given {
+//       var encoder = SmallerNNClassifier(
+//         imageHeight: H_, imageWidth: W_, imageChannels: 1, latentDimension: params.latentDimension
+//       )
+
+//       let np = Python.import("numpy")
+
+//       encoder.load(weights: np.load(params.weightFile, allow_pickle: true))
+//       inner = encoder
+//     } else {
+//       inner = SmallerNNClassifier(
+//         imageHeight: H_, imageWidth: W_, imageChannels: 1, latentDimension: 1
+//       )
+//       fatalError("Must provide hyperparameters to pretrained network")
+//     }
+//   }
+  
+//   /// Constructor that does training of the network
+//   public init(patches patches: Tensor<Double>, labels labels: Tensor<Int8>, given: HyperParameters?) {
+//     inner = SmallerNNClassifier(
+//       patches: patches, labels: labels, given: (given != nil) ? (given!.latentDimension) : nil
+//     )
+//   }
+  
+//   /// Save the weight to file
+//   public func save(to path: String) {
+//     let np = Python.import("numpy")
+//     np.save(path, np.array(inner.numpyWeights, dtype: Python.object))
+//   }
+
+//   @differentiable
+//   public func classify(_ imageBatch: Tensor<Double>) -> Tensor<Double> {
+//     inner.classify(imageBatch)
+//   }
+  
+  
+//   /// Initialize  given an image batch
+//   public typealias HyperParameters = (latentDimension: Int, weightFile: String)
+// }
+
+
+
+
+
+
+// public struct PretrainedLargerNNClassifier : Classifier{
+//   public var inner: LargerNNClassifier
+  
+//   /// The constructor that only does loading of the pretrained weights.
+//   public init(from imageBatch: Tensor<Double>, given: HyperParameters?) {
+//     let shape = imageBatch.shape
+//     precondition(imageBatch.rank == 4, "Wrong image shape \(shape)")
+//     let (_, H_, W_, C_) = (shape[0], shape[1], shape[2], shape[3])
+//     if let params = given {
+//       var encoder = LargerNNClassifier(
+//         imageHeight: H_, imageWidth: W_, imageChannels: 1,
+//         hiddenDimension: params.hiddenDimension, latentDimension: params.latentDimension
+//       )
+
+//       let np = Python.import("numpy")
+
+//       encoder.load(weights: np.load(params.weightFile, allow_pickle: true))
+//       inner = encoder
+//     } else {
+//       inner = LargerNNClassifier(
+//         imageHeight: H_, imageWidth: W_, imageChannels: 1,
+//         hiddenDimension: 1, latentDimension: 1
+//       )
+//       fatalError("Must provide hyperparameters to pretrained network")
+//     }
+//   }
+  
+//   /// Constructor that does training of the network
+//   public init(patches patches: Tensor<Double>, labels labels: Tensor<Int8>, given: HyperParameters?) {
+//     inner = LargerNNClassifier(
+//       patches: patches, labels: labels, given: (given != nil) ? (hiddenDimension: given!.hiddenDimension, latentDimension: given!.latentDimension) : nil
+//     )
+//   }
+  
+//   /// Save the weight to file
+//   public func save(to path: String) {
+//     let np = Python.import("numpy")
+//     np.save(path, np.array(inner.numpyWeights, dtype: Python.object))
+//   }
+
+//   @differentiable
+//   public func classify(_ imageBatch: Tensor<Double>) -> Tensor<Double> {
+//     inner.classify(imageBatch)
+//   }
+  
+  
+//   /// Initialize  given an image batch
+//   public typealias HyperParameters = (hiddenDimension: Int, latentDimension: Int, weightFile: String)
+// }
\ No newline at end of file
diff --git a/Sources/BeeTracking/OISTBeeVideo+Batches.swift b/Sources/BeeTracking/OISTBeeVideo+Batches.swift
index 399eced7..5b7ac722 100644
--- a/Sources/BeeTracking/OISTBeeVideo+Batches.swift
+++ b/Sources/BeeTracking/OISTBeeVideo+Batches.swift
@@ -86,7 +86,7 @@ extension OISTBeeVideo {
     /// Anything not completely overlapping labels
     var deterministicEntropy = ARC4RandomNumberGenerator(seed: 42)
     let frames = self.randomFrames(self.frames.count, using: &deterministicEntropy)
-
+    
     // We need `batchSize / frames.count` patches from each frame, plus the remainder of the
     // integer division.
     var patchesPerFrame = Array(repeating: batchSize / frames.count, count: frames.count)
@@ -103,7 +103,6 @@ extension OISTBeeVideo {
             rows: patchSize.0, cols: patchSize.1))
       }
     }
-
     return obbs
   }
   
diff --git a/Sources/BeeTracking/ProbabilisticTracker.swift b/Sources/BeeTracking/ProbabilisticTracker.swift
index 24231d77..f1ae8938 100644
--- a/Sources/BeeTracking/ProbabilisticTracker.swift
+++ b/Sources/BeeTracking/ProbabilisticTracker.swift
@@ -149,12 +149,12 @@ public func trainProbabilisticTracker<Encoder: AppearanceModelEncoder>(
     bgRandomFrameCount: bgRandomFrameCount,
     useCache: true
   )
-
   let batchPositive = encoder.encode(fg)
   let foregroundModel = MultivariateGaussian(from:batchPositive, regularizer: 1e-3)
 
+
   let batchNegative = encoder.encode(bg)
-  let backgroundModel = GaussianNB(from: batchNegative, regularizer: 1e-3)
+  let backgroundModel = MultivariateGaussian(from: batchNegative, regularizer: 1e-3)
 
   let tracker = makeProbabilisticTracker(
     model: encoder,
@@ -207,7 +207,7 @@ public func makeProbabilisticTracker<
         appearanceModelSize: targetSize,
         foregroundModel: foregroundModel,
         backgroundModel: backgroundModel,
-        maxPossibleNegativity: 1e4
+        maxPossibleNegativity: 1e7
       )
     )
   }
diff --git a/Sources/BeeTracking/ProbabilisticTracker2.swift b/Sources/BeeTracking/ProbabilisticTracker2.swift
new file mode 100644
index 00000000..dc2921e3
--- /dev/null
+++ b/Sources/BeeTracking/ProbabilisticTracker2.swift
@@ -0,0 +1,72 @@
+import BeeDataset
+import PenguinStructures
+import SwiftFusion
+import TensorFlow
+import PythonKit
+import Foundation
+
+/// Returns a tracking configuration for a tracker using an random projection.
+///
+/// Parameter model: The random projection model to use.
+/// Parameter frames: The frames of the video where we want to run tracking.
+/// Parameter targetSize: The size of the target in the frames.
+public func makeProbabilisticTracker2<
+  MyClassifier: Classifier
+>(
+  model: MyClassifier,
+  frames: [Tensor<Float>],
+  targetSize: (Int, Int)
+) -> TrackingConfiguration<Tuple1<Pose2>> {
+  var variableTemplate = VariableAssignments()
+  var frameVariableIDs = [Tuple1<TypedID<Pose2>>]()
+  for _ in 0..<frames.count {
+    frameVariableIDs.append(
+      Tuple1(
+        variableTemplate.store(Pose2())
+        ))
+  }
+
+  let addPrior = { (variables: Tuple1<TypedID<Pose2>>, values: Tuple1<Pose2>, graph: inout FactorGraph) -> () in
+    let (poseID) = unpack(variables)
+    let (pose) = unpack(values)
+    graph.store(WeightedPriorFactorPose2(poseID, pose, weight: 1e-2, rotWeight: 2e2))
+  }
+
+  let addTrackingFactor = { (variables: Tuple1<TypedID<Pose2>>, frame: Tensor<Float>, graph: inout FactorGraph) -> () in
+    let (poseID) = unpack(variables)
+    graph.store(
+      ProbablisticTrackingFactor2(poseID,
+        measurement: frame,
+        classifier: model,
+        patchSize: targetSize,
+        appearanceModelSize: targetSize
+      )
+    )
+  }
+
+  return TrackingConfiguration(
+    frames: frames,
+    variableTemplate: variableTemplate,
+    frameVariableIDs: frameVariableIDs,
+    addPriorFactor: addPrior,
+    addTrackingFactor: addTrackingFactor,
+    addBetweenFactor: { (variables1, variables2, graph) -> () in
+      let (poseID1) = unpack(variables1)
+      let (poseID2) = unpack(variables2)
+      graph.store(WeightedBetweenFactorPose2(poseID1, poseID2, Pose2(), weight: 1e-2, rotWeight: 2e2))
+    },
+    addFixedBetweenFactor: { (values, variables, graph) -> () in
+      let (prior) = unpack(values)
+      let (poseID) = unpack(variables)
+      graph.store(WeightedPriorFactorPose2SD(poseID, prior, sdX: 8, sdY: 8, sdTheta:0.4))
+    })
+}
+
+/// Returns `t` as a Swift tuple.
+fileprivate func unpack<A, B>(_ t: Tuple2<A, B>) -> (A, B) {
+  return (t.head, t.tail.head)
+}
+/// Returns `t` as a Swift tuple.
+fileprivate func unpack<A>(_ t: Tuple1<A>) -> (A) {
+  return (t.head)
+}
\ No newline at end of file
diff --git a/Sources/BeeTracking/TrackingFactorGraph.swift b/Sources/BeeTracking/TrackingFactorGraph.swift
index 5cfeb9e1..89dd760e 100644
--- a/Sources/BeeTracking/TrackingFactorGraph.swift
+++ b/Sources/BeeTracking/TrackingFactorGraph.swift
@@ -134,7 +134,8 @@ public struct TrackingConfiguration<FrameVariables: VariableTuple> {
   ) -> ()
 
   /// The optimizer to use during inference.
-  public var optimizer = LM()
+  // public var optimizer = LM()
+  public var optimizer = GradientDescent(learningRate: 1e-3)
 
   /// Creates an instance.
   ///
@@ -172,6 +173,7 @@ public struct TrackingConfiguration<FrameVariables: VariableTuple> {
     self.addBetweenFactor = addBetweenFactor
     self.addFixedBetweenFactor = addFixedBetweenFactor!
 
+    // For LM
     self.optimizer.precision = 1e-1
     self.optimizer.max_iteration = 100
     self.optimizer.cgls_precision = 1e-5
@@ -194,16 +196,16 @@ public struct TrackingConfiguration<FrameVariables: VariableTuple> {
     // First get pose IDs: pose is assumed to be first variable in the frameVariableID tuple
     let currentPoseID = (frameVariableIDs[i + 1] as! Tuple1<TypedID<Pose2>>).head
     let previousPoseID = (frameVariableIDs[i] as! Tuple1<TypedID<Pose2>>).head
-    
     // Remember best pose
     var bestPose = x[currentPoseID]
-    
     // Sample from motion model and take best pose
     var bestError = g.error(at: x)
-    for _ in 0..<2000 {
+    
+    for _ in 0..<256 {
       x[currentPoseID] = x[previousPoseID]
-      x[currentPoseID].perturbWith(stddev: Vector3(0.3, 8, 4.6))
+      x[currentPoseID].perturbWith(stddev: Vector3(0.2, 8, 8))
       let candidateError = g.error(at: x)
+
       if candidateError < bestError {
         bestError = candidateError
         bestPose = x[currentPoseID]
@@ -212,20 +214,19 @@ public struct TrackingConfiguration<FrameVariables: VariableTuple> {
     x[currentPoseID] = bestPose
   }
   
+  
   /// Extend the track
   mutating func extendTrack(x: inout VariableAssignments, fromFrame i:Int,
                             withSampling samplingFlag: Bool = false
   ) {
     let currentVarID = frameVariableIDs[i + 1]
     let previousVarID = frameVariableIDs[i]
-    
     // Create a tracking factor graph on just the `i+1`-th variable.
     var g = graph(on: (i + 1)..<(i + 2))
     
     // The `i`-th variable is already initialized well, so add a prior factor that it stays
     // near its current position.
     addFixedBetweenFactor(x[previousVarID], currentVarID, &g)
-    
     // Initialize
     if (samplingFlag) {
       // Try to initialize pose of the `i+1`-th variable by sampling
@@ -256,7 +257,6 @@ public struct TrackingConfiguration<FrameVariables: VariableTuple> {
     }
     
     // TODO: We could also do a final optimization on all the variables jointly here.
-    
     return x
   }
 }
diff --git a/Sources/BeeTracking/TrackingMetrics.swift b/Sources/BeeTracking/TrackingMetrics.swift
index 3413a47e..2cdfb529 100644
--- a/Sources/BeeTracking/TrackingMetrics.swift
+++ b/Sources/BeeTracking/TrackingMetrics.swift
@@ -42,7 +42,7 @@ public struct SubsequenceMetrics: Codable {
     // Find the first failure frame.
     var NFsa = prediction.count
     for (index, overlap) in overlaps.enumerated() {
-      if overlap < 0.1 {
+      if overlap < 0.05 {
         NFsa = index
         break
       }
@@ -194,6 +194,7 @@ extension TrackerEvaluationDataset {
       print("Evaluating sequence \(i + 1) of \(sequenceCount)")
       return sequence.evaluate(tracker, deltaAnchor: deltaAnchor, outputFile: "\(outputFile)-sequence\(i)")
     }
+
     let result = TrackerEvaluationResults(
       sequences: sequenceEvaluations,
       trackerMetrics: TrackerMetrics(sequenceEvaluations.map { $0.sequenceMetrics }),
@@ -252,10 +253,13 @@ extension TrackerEvaluationSequence {
           let subsequence = subsequences[i]
           print("Evaluating subsequence \(i + 1) of \(subsequences.count)")
           (buf.baseAddress! + i).initialize(to: tracker(subsequence.frames, subsequence.groundTruth[0]))
+
         }
       }
+
       actualCount = subsequences.count
     }
+
     let subsequenceEvaluations = zip(subsequences, subsequencePredictions).map {
       SubsequenceEvaluationResults(
         metrics: SubsequenceMetrics(groundTruth: $0.0.groundTruth, prediction: $0.1),
@@ -263,7 +267,6 @@ extension TrackerEvaluationSequence {
         groundTruth: $0.0.groundTruth,
         frames: $0.0.frames)
     }
-
     let result = SequenceEvaluationResults(
       subsequences: subsequenceEvaluations,
       sequenceMetrics: SequenceMetrics(subsequenceEvaluations.map { $0.metrics }))
@@ -281,7 +284,7 @@ extension TrackerEvaluationDataset {
     for track in video.tracks {
       let sequence = TrackerEvaluationSequence(
         frames: Array(
-          video.frames[track.startFrameIndex..<(track.startFrameIndex + track.boxes.count)]),
+          video.frames[track.startFrameIndex..<(track.boxes.count)]),
         groundTruth: track.boxes)
       sequences.append(sequence)
     }
diff --git a/Sources/BeeTracking/Visualizations.swift b/Sources/BeeTracking/Visualizations.swift
index 7b1fc979..f032db6b 100644
--- a/Sources/BeeTracking/Visualizations.swift
+++ b/Sources/BeeTracking/Visualizations.swift
@@ -60,6 +60,7 @@ public func plotOverlap(metrics: SubsequenceMetrics, on ax: PythonObject) {
   ax.set_title("Overlap")
 }
 
+
 /// plot Comparison image
 public func plotPatchWithGT(frame: Tensor<Float>, actual: Pose2, expected: Pose2) -> (PythonObject, PythonObject) {
   let plt = Python.import("matplotlib.pyplot")
@@ -77,6 +78,183 @@ public func plotPatchWithGT(frame: Tensor<Float>, actual: Pose2, expected: Pose2
   return (fig, ax)
 }
 
+public func plotPoseDifference(track: [Pose2], withGroundTruth expected: [Pose2], on ax: PythonObject) {
+  var thetaDiff = zip(track, expected).map{pow(($0.0.rot.theta - $0.1.rot.theta), 2.0)}
+  var posDiff = zip(track, expected).map{pow(($0.0.t.x - $0.1.t.x), 2.0) + pow(($0.0.t.y - $0.1.t.y), 2.0)}
+  ax.plot(thetaDiff, posDiff)
+  ax.set_title("L2 Theta Difference (X-axis) vs. L2 X, Y Difference Over Time")
+}
+
+public func plotFrameWithPatches(frame: Tensor<Float>, actual: Pose2, expected: Pose2, firstGroundTruth: Pose2) -> (PythonObject, PythonObject) {
+  let plt = Python.import("matplotlib.pyplot")
+  let mpl = Python.import("matplotlib")
+
+  let (fig, ax) = plt.subplots(figsize: Python.tuple([8, 4])).tuple2
+  ax.imshow(frame.makeNumpyArray() / 255.0, cmap: "gray")
+  let actualBoundingBox = OrientedBoundingBox(center: actual, rows: 40, cols: 70)
+  ax.plot(actualBoundingBox.corners.map{$0.x} + [actualBoundingBox.corners.first!.x], actualBoundingBox.corners.map{$0.y} + [actualBoundingBox.corners.first!.y], "r-")
+  var supportPatch = mpl.patches.RegularPolygon(
+    Python.tuple([actualBoundingBox.center.t.x, actualBoundingBox.center.t.y]),
+    numVertices:3,
+    radius:10,
+    color:"r",
+    orientation: actualBoundingBox.center.rot.theta - (Double.pi / 2)
+  )
+  ax.add_patch(supportPatch) 
+
+  let expectedBoundingBox = OrientedBoundingBox(center: expected, rows: 40, cols: 70)
+  ax.plot(Python.list(expectedBoundingBox.corners.map{$0.x} + [expectedBoundingBox.corners.first!.x]), Python.list(expectedBoundingBox.corners.map{$0.y} + [expectedBoundingBox.corners.first!.y]), "b-")
+
+  supportPatch = mpl.patches.RegularPolygon(
+    Python.tuple([expectedBoundingBox.center.t.x, expectedBoundingBox.center.t.y]),
+    numVertices:3,
+    radius:10,
+    color:"b",
+    orientation: expectedBoundingBox.center.rot.theta - (Double.pi / 2)
+  )
+  ax.add_patch(supportPatch) 
+  ax.set_xlim(expected.t.x - 100, expected.t.x + 100)
+  ax.set_ylim(expected.t.y - 100, expected.t.y + 100)
+
+  ax.title.set_text("Prediction (Red) vs. Actual (Green)")
+  return (fig, ax)
+}
+
+
+/// plot Comparison image
+public func plotFrameWithPatches2(frame: Tensor<Float>, actual_box1: OrientedBoundingBox, actual_box2: OrientedBoundingBox, expected: Pose2, firstGroundTruth: Pose2) -> (PythonObject, PythonObject) {
+  let plt = Python.import("matplotlib.pyplot")
+  let mpl = Python.import("matplotlib")
+  let (fig, ax) = plt.subplots(1, 2, figsize: Python.tuple([8, 4])).tuple2
+  let np = Python.import("numpy")
+  let fr = np.squeeze(frame.makeNumpyArray())
+  ax[0].imshow(fr / 255.0, cmap: "gray")
+  ax[1].imshow(fr / 255.0, cmap: "gray")
+  ax[0].set_axis_off()
+  ax[1].set_axis_off()
+  let actualBoundingBox = OrientedBoundingBox(center: actual_box1.center, rows: actual_box1.rows, cols: actual_box1.cols)
+  ax[0].plot(actualBoundingBox.corners.map{$0.x} + [actualBoundingBox.corners.first!.x], actualBoundingBox.corners.map{$0.y} + [actualBoundingBox.corners.first!.y], "r-")
+  var supportPatch = mpl.patches.RegularPolygon(
+    Python.tuple([actualBoundingBox.center.t.x, actualBoundingBox.center.t.y]),
+    numVertices:3,
+    radius:10,
+    color:"r",
+    orientation: actualBoundingBox.center.rot.theta - (Double.pi / 2)
+  )
+  ax[0].add_patch(supportPatch) 
+  ax[0].add_patch(supportPatch) 
+  ax[0].set_xlim(firstGroundTruth.t.x - 200, firstGroundTruth.t.x + 200)
+  ax[0].set_ylim(firstGroundTruth.t.y - 200, firstGroundTruth.t.y + 200)
+  ax[0].title.set_text("RAE 256")
+
+  let actualBoundingBox2 = OrientedBoundingBox(center: actual_box2.center, rows: actual_box2.rows, cols: actual_box2.cols)
+  ax[1].plot(actualBoundingBox2.corners.map{$0.x} + [actualBoundingBox2.corners.first!.x], actualBoundingBox2.corners.map{$0.y} + [actualBoundingBox2.corners.first!.y], "r-")
+
+  ax[1].set_xlim(firstGroundTruth.t.x - 200, firstGroundTruth.t.x + 200)
+  ax[1].set_ylim(firstGroundTruth.t.y - 200, firstGroundTruth.t.y + 200)
+  ax[1].title.set_text("SiamMask")
+
+  return (fig, ax)
+}
+
+
+public func plotXYandTheta(xs: [Double], ys: [Double], thetas: [Double]) -> (PythonObject, PythonObject) {
+
+  let plt = Python.import("matplotlib.pyplot")
+  let np = Python.import("numpy")
+
+  let (fig, axs) = plt.subplots(1,2,figsize: Python.tuple([8, 4])).tuple2
+
+  let ax2 = axs[0]
+  ax2.plot(np.arange(0,xs.count), xs)
+  ax2.plot(np.arange(0,xs.count), ys)
+  ax2.title.set_text("X and Y")
+
+
+  let ax3 = axs[1]
+  ax3.plot(np.arange(0,xs.count), thetas)
+  ax3.title.set_text("Theta")
+
+  return (fig, axs)
+
+
+}
+
+
+
+
+/// plot Optimization beginning, end, 
+public func plotFrameWithPatches3(frame: Tensor<Float>, start: Pose2, end: Pose2, expected: Pose2, firstGroundTruth: Pose2, errors: [Double], xs: [Double], ys: [Double], thetas: [Double]) -> (PythonObject, PythonObject) {
+  let plt = Python.import("matplotlib.pyplot")
+  let mpl = Python.import("matplotlib")
+  let (fig, axs) = plt.subplots(2,3,figsize: Python.tuple([18, 10])).tuple2
+  let ax = axs[0][0]
+  let np = Python.import("numpy")
+  let fr = np.squeeze(frame.makeNumpyArray())
+  ax.imshow(fr / 255.0, cmap: "gray")
+  let startBoundingBox = OrientedBoundingBox(center: start, rows: 40, cols: 70)
+  ax.plot(startBoundingBox.corners.map{$0.x} + [startBoundingBox.corners.first!.x], startBoundingBox.corners.map{$0.y} + [startBoundingBox.corners.first!.y], "g-")
+
+  let expectedBoundingBox = OrientedBoundingBox(center: expected, rows: 40, cols: 70)
+  ax.plot(Python.list(expectedBoundingBox.corners.map{$0.x} + [expectedBoundingBox.corners.first!.x]), Python.list(expectedBoundingBox.corners.map{$0.y} + [expectedBoundingBox.corners.first!.y]), "b-")
+  var supportPatch = mpl.patches.RegularPolygon(
+    Python.tuple([expectedBoundingBox.center.t.x, expectedBoundingBox.center.t.y]),
+    numVertices:3,
+    radius:10,
+    color:"b",
+    orientation: expectedBoundingBox.center.rot.theta - (Double.pi / 2)
+  )
+  ax.add_patch(supportPatch) 
+  supportPatch = mpl.patches.RegularPolygon(
+    Python.tuple([startBoundingBox.center.t.x, startBoundingBox.center.t.y]),
+    numVertices:3,
+    radius:10,
+    color:"g",
+    orientation: startBoundingBox.center.rot.theta - (Double.pi / 2)
+  )
+  ax.add_patch(supportPatch) 
+
+
+  let endBoundingBox = OrientedBoundingBox(center: end, rows: 40, cols: 70)
+  ax.plot(endBoundingBox.corners.map{$0.x} + [endBoundingBox.corners.first!.x], endBoundingBox.corners.map{$0.y} + [endBoundingBox.corners.first!.y], "r-")
+  supportPatch = mpl.patches.RegularPolygon(
+    Python.tuple([endBoundingBox.center.t.x, endBoundingBox.center.t.y]),
+    numVertices:3,
+    radius:10,
+    color:"r",
+    orientation: endBoundingBox.center.rot.theta - (Double.pi / 2)
+  )
+  ax.add_patch(supportPatch) 
+
+
+
+  ax.set_xlim(firstGroundTruth.t.x - 200, firstGroundTruth.t.x + 200)
+  ax.set_ylim(firstGroundTruth.t.y - 200, firstGroundTruth.t.y + 200)
+  ax.title.set_text("Start (Green), End (Red), vs. Label (Blue)")
+
+  let ax1 = axs[0][1]
+  ax1.plot(np.arange(0,errors.count), errors)
+  ax1.title.set_text("Error value")
+
+
+  let ax2 = axs[0][2]
+  ax2.plot(np.arange(0,xs.count), xs)
+  ax2.title.set_text("X")
+
+  let ax4 = axs[1][1]
+  ax4.plot(np.arange(0,xs.count), ys)
+  ax4.title.set_text("Y")
+
+  let ax5 = axs[1][2]
+  ax5.plot(np.arange(0,xs.count), thetas)
+  ax5.title.set_text("Theta")
+
+
+
+  return (fig, ax)
+}
+
+
 /// Calculate the translation error plane (X-Y)
 public func errorPlaneTranslation<
   Encoder: AppearanceModelEncoder,
diff --git a/Sources/SwiftFusion/Inference/FactorBoilerplate.swift b/Sources/SwiftFusion/Inference/FactorBoilerplate.swift
index 51db2e74..f3d563f3 100644
--- a/Sources/SwiftFusion/Inference/FactorBoilerplate.swift
+++ b/Sources/SwiftFusion/Inference/FactorBoilerplate.swift
@@ -133,6 +133,29 @@ extension LinearizableFactor1 {
 }
 
 
+/// A factor, with 2 variable(s), in a factor graph. Uses Vector1
+public protocol LinearizableFactor1a: LinearizableFactor, LinearizableFactor1_
+  where Variables == Tuple1<V0>, LinearizableComponent == Self {}
+
+extension LinearizableFactor1a {
+  /// The variable vertex for this factor's 0-th variable.
+  public var input0ID: TypedID<V0> { return edges.head }
+
+
+  // Implements the error as the scalar value of the 1D Vector.
+  public func error(at x: Variables) -> Double {
+    return (errorVector(at: x) as! Vector1).x
+  }
+
+  // Forwarding implementation.
+  @differentiable
+  public func errorVector(at x: Variables) -> ErrorVector {
+    return errorVector(x.head)
+  }
+}
+
+
+
 
 // Artifact of Swift weakness.
 /// Do not use this. Use `Factor2` instead.
diff --git a/Sources/SwiftFusion/Inference/FactorsStorage.swift b/Sources/SwiftFusion/Inference/FactorsStorage.swift
index 174b72aa..fdd65b4f 100644
--- a/Sources/SwiftFusion/Inference/FactorsStorage.swift
+++ b/Sources/SwiftFusion/Inference/FactorsStorage.swift
@@ -62,7 +62,16 @@ extension ArrayStorage where Element: VectorFactor {
           let (lFactor, lVars) = factor.linearizableComponent(at: vars)
           let gradIndices = LVariables.linearized(lFactor.edges)
           let grads = GradVariables(at: gradIndices, in: GradVariables.withoutMutation(gradBufs))
-          let newGrads = grads + gradient(at: lVars) { lFactor.errorVector(at: $0).squaredNorm }
+
+          var newGrads = grads
+          if let gradUpdate = lFactor.errorVector(at: lVars) as? Vector3 {
+              newGrads = newGrads + gradient(at: lVars) { lFactor.errorVector(at: $0).squaredNorm }
+          }
+          else {
+              var currGrads = gradient(at: lVars) { (lFactor.errorVector(at: $0) as! Vector1 + Vector1(1000.0)).squaredNorm } as! PenguinStructures.Tuple<SwiftFusion.Vector3, PenguinStructures.Empty>
+              currGrads.head.x = currGrads.head.x / 100.0
+              newGrads = newGrads + (currGrads as! Element.LinearizableComponent.Variables.TangentVector)
+          }
           newGrads.assign(into: gradIndices, in: gradBufs)
         }
       }
diff --git a/Sources/SwiftFusion/Inference/LatentAppearanceTrackingFactor.swift b/Sources/SwiftFusion/Inference/LatentAppearanceTrackingFactor.swift
index f98b799b..fdf8a89a 100644
--- a/Sources/SwiftFusion/Inference/LatentAppearanceTrackingFactor.swift
+++ b/Sources/SwiftFusion/Inference/LatentAppearanceTrackingFactor.swift
@@ -2,14 +2,24 @@ import PenguinParallel
 import PenguinStructures
 import TensorFlow
 
-public protocol AppearanceModelEncoder {
-  associatedtype HyperParameters
-  init(from imageBatch: Tensor<Double>, given: HyperParameters?)
+// Same as Encoder. To be used only for an end-to-end classifier
+public protocol Classifier {
+  @differentiable
+  func classify(_ imageBatch: Tensor<Double>) -> Tensor<Double>
+}
 
+// To be used as an encoder. 
+public protocol Encoder {
   @differentiable
   func encode(_ imageBatch: Tensor<Double>) -> Tensor<Double>
 }
 
+
+public protocol AppearanceModelEncoder : Encoder {
+  associatedtype HyperParameters
+  init(from imageBatch: Tensor<Double>, given: HyperParameters?)
+}
+
 public extension AppearanceModelEncoder {
   /// Extension allows to have a default nil parameter
   init(from imageBatch: Tensor<Double>) {
diff --git a/Sources/SwiftFusion/Inference/ProbablisticTrackingFactor.swift b/Sources/SwiftFusion/Inference/ProbablisticTrackingFactor.swift
index 585948e4..b5f4ae49 100644
--- a/Sources/SwiftFusion/Inference/ProbablisticTrackingFactor.swift
+++ b/Sources/SwiftFusion/Inference/ProbablisticTrackingFactor.swift
@@ -88,3 +88,48 @@ public struct ProbablisticTrackingFactor<
     return Vector1(result)
   }
 }
+
+
+public struct ProbablisticTrackingFactor2<
+    MyClassifier: Classifier
+  >: LinearizableFactor1a {
+  public typealias V0 = Pose2
+
+  public let edges: Variables.Indices
+
+  public let measurement: ArrayImage
+
+  public let classifier: MyClassifier
+
+  public var patchSize: (Int, Int)
+
+  public var appearanceModelSize: (Int, Int)
+
+
+  public init(
+    _ poseId: TypedID<Pose2>,
+    measurement: Tensor<Float>,
+    classifier: MyClassifier,
+    patchSize: (Int, Int),
+    appearanceModelSize: (Int, Int)
+  ) {
+    self.edges = Tuple1(poseId)
+    self.measurement = ArrayImage(measurement)
+    self.classifier = classifier
+    self.patchSize = patchSize
+    self.appearanceModelSize = appearanceModelSize
+  }
+
+  @differentiable
+  public func errorVector(_ pose: Pose2) -> Vector1 {
+    let region = OrientedBoundingBox(center: pose, rows: patchSize.0, cols: patchSize.1)
+    let patch = Tensor<Double>(measurement.patch(at: region, outputSize: appearanceModelSize).tensor)
+    let output = classifier.classify(patch.expandingShape(at: 0)).squeezingShape(at: 0)
+
+    let sm = softmax(output)
+    let loglikelihood = -log(sm[1]) + log(sm[0])
+
+    var result = loglikelihood.scalarized()
+    return Vector1(result)
+  }
+}
diff --git a/Sources/SwiftFusion/Optimizers/GradientDescent.swift b/Sources/SwiftFusion/Optimizers/GradientDescent.swift
index 17f3353b..f46582a2 100644
--- a/Sources/SwiftFusion/Optimizers/GradientDescent.swift
+++ b/Sources/SwiftFusion/Optimizers/GradientDescent.swift
@@ -17,15 +17,72 @@ import _Differentiation
 public struct GradientDescent {
   /// The fraction of the gradient to move per step.
   public var learningRate: Double
+  public var baseLearningRate: Double
 
   /// Creates an instance with the given `learningRate`.
   public init(learningRate: Double) {
     self.learningRate = learningRate
+    self.baseLearningRate = learningRate
   }
-
+  /// Get the learning rate schedule based on the dataset size
+    ///
+    /// - Parameters:
+    ///   - datasetSize: number of images in the current dataset
+    /// - Returns: learning rate schedule based on the current dataset
+    func getSchedule(datasetSize: Int) -> Array<Int> {
+    if datasetSize == 100 {
+        return [3, 6, 10, 100]
+    }
+    if datasetSize < 20000{
+        return [100, 200, 300, 400, 500]
+    }
+    else if datasetSize < 500000 {
+        return [500, 3000, 6000, 9000, 10000]
+    }
+    else {
+        return [500, 6000, 12000, 18000, 20000]
+    }
+    }
+  /// Get learning rate at the current step given the dataset size and base learning rate
+    ///
+    /// - Parameters:
+    ///   - step: current training step
+    ///   - datasetSize: number of images in the dataset
+    ///   - baseLearningRate: starting learning rate to modify
+    /// - Returns: learning rate at the current step in training
+    func getLearningRate(step: Int, datasetSize: Int, baseLearningRate: Float = 0.003) -> Float? {
+    let supports = getSchedule(datasetSize: datasetSize)
+    // Linear warmup
+    if step < supports[0] {
+        return baseLearningRate * Float(step) / Float(supports[0])
+    }
+    // End of training
+    else if step >= supports.last! {
+        return nil
+    }
+    // Staircase decays by factor of 10
+    else {
+        var baseLearningRate = baseLearningRate
+        for s in supports[1...] {
+        if s < step {
+            baseLearningRate = baseLearningRate / 10.0
+        }
+        }
+        return baseLearningRate
+    }
+ }
   /// Moves `values` along the gradient of `objective`'s error function for a single gradient
   /// descent step.
   public func update(_ values: inout VariableAssignments, objective: FactorGraph) {
     values.move(along: -learningRate * objective.errorGradient(at: values))
   }
 }
+
+extension GradientDescent : Optimizer {
+    public mutating func optimize(graph: FactorGraph, initial: inout VariableAssignments) {
+        for i in 0..<15 {
+          self.learningRate = Double(getLearningRate(step: i + 1, datasetSize: 100, baseLearningRate: Float(self.baseLearningRate))!)
+          self.update(&initial, objective: graph)
+        }
+    } 
+}
\ No newline at end of file
diff --git a/Sources/SwiftFusion/Optimizers/LM.swift b/Sources/SwiftFusion/Optimizers/LM.swift
index 76a66193..7beed981 100644
--- a/Sources/SwiftFusion/Optimizers/LM.swift
+++ b/Sources/SwiftFusion/Optimizers/LM.swift
@@ -82,8 +82,8 @@ public struct LM {
     var inner_iter_step = 0
     var inner_success = false
     var all_done = false
-    
     for _ in 0..<max_iteration { // outer loop
+
       // Do logging first
       if let h = hook {
         h(graph, val, lambda, step)
@@ -189,3 +189,10 @@ public struct LM {
     }
   }
 }
+
+extension LM: Optimizer {
+  public mutating func optimize(graph: FactorGraph, initial: inout VariableAssignments) {
+    try? self.optimize(graph: graph, initial: &initial, hook: nil)
+  }
+}
+
diff --git a/Sources/SwiftFusion/Optimizers/OptimizerProtocol.swift b/Sources/SwiftFusion/Optimizers/OptimizerProtocol.swift
new file mode 100644
index 00000000..ce02dace
--- /dev/null
+++ b/Sources/SwiftFusion/Optimizers/OptimizerProtocol.swift
@@ -0,0 +1,5 @@
+
+public protocol Optimizer {
+  mutating func optimize(graph: FactorGraph, initial: inout VariableAssignments) -> ()
+  
+}
\ No newline at end of file
diff --git a/Sources/SwiftFusion/Probability/MultivariateGaussian.swift b/Sources/SwiftFusion/Probability/MultivariateGaussian.swift
index 7e96aa4f..33885a73 100644
--- a/Sources/SwiftFusion/Probability/MultivariateGaussian.swift
+++ b/Sources/SwiftFusion/Probability/MultivariateGaussian.swift
@@ -78,7 +78,7 @@ public struct MultivariateGaussian: GenerativeDensity {
     
     return t.scalarized() / 2.0
   }
-  
+
   /// Calculated normalized probability
   @differentiable public func probability(_ sample: T) -> Double {
     // - ToDo: Precalculate constant
diff --git a/Tests/SwiftFusionTests/Optimizers/GradientDescentTests.swift b/Tests/SwiftFusionTests/Optimizers/GradientDescentTests.swift
index 2ec763ed..de38d895 100644
--- a/Tests/SwiftFusionTests/Optimizers/GradientDescentTests.swift
+++ b/Tests/SwiftFusionTests/Optimizers/GradientDescentTests.swift
@@ -17,7 +17,7 @@ import SwiftFusion
 import XCTest
 
 final class GradientDescentTests: XCTestCase {
-  /// Test convergence for a simple Pose2SLAM graph.
+  // Test convergence for a simple Pose2SLAM graph.
   func testPose2SLAM() {
     var x = VariableAssignments()
     let pose1ID = x.store(Pose2(Rot2(0.2), Vector2(0.5, 0.0)))