diff --git a/Examples/OISTVisualizationTool/main.swift b/Examples/OISTVisualizationTool/main.swift index fab3a70c..51a9a826 100644 --- a/Examples/OISTVisualizationTool/main.swift +++ b/Examples/OISTVisualizationTool/main.swift @@ -44,9 +44,9 @@ struct ViewFrame: ParsableCommand { let image = dataset.loadFrame(frameRawId)! - plot(image, boxes: dataset.labels[frameId].enumerated().map { - (String($0), $1.location) - }, margin: 10.0, scale: 0.5).show() + // plot(image, boxes: dataset.labels[frameId].enumerated().map { + // (String($0), $1.location) + // }, margin: 10.0, scale: 0.5).show() } } @@ -118,7 +118,7 @@ struct RawTrack: ParsableCommand { var tracker = makeRawPixelTracker(frames: videos, target: startPatch) - if verbose { tracker.optimizer.verbosity = .SUMMARY } + // if verbose { tracker.optimizer.verbosity = .SUMMARY } For LM Optimizer let prediction = tracker.infer(knownStart: Tuple1(startPose)) @@ -150,9 +150,9 @@ struct RawTrack: ParsableCommand { print("Creating output plot") } startTimer("PLOTTING") - plot(image, boxes: bboxes.indices.map { - ("\($0)", bboxes[$0]) - }, margin: 10.0, scale: 0.5).show() + // plot(image, boxes: bboxes.indices.map { + // ("\($0)", bboxes[$0]) + // }, margin: 10.0, scale: 0.5).show() stopTimer("PLOTTING") if verbose { @@ -207,11 +207,11 @@ struct PpcaTrack: ParsableCommand { startTimer("MAKE_GRAPH") var tracker = makePPCATracker(model: ppca, statistics: statistics, frames: videos, targetSize: (40, 70)) stopTimer("MAKE_GRAPH") + // For LM Optimizer + // if verbose { tracker.optimizer.verbosity = .SUMMARY } - if verbose { tracker.optimizer.verbosity = .SUMMARY } - - tracker.optimizer.cgls_precision = 1e-6 - tracker.optimizer.precision = 1e-2 + // tracker.optimizer.cgls_precision = 1e-6 + // tracker.optimizer.precision = 1e-2 startTimer("GRAPH_INFER") let prediction = tracker.infer(knownStart: Tuple2(startPose, Vector10(flatTensor: startLatent))) @@ -255,9 +255,9 @@ struct PpcaTrack: ParsableCommand { print("Creating output plot") } startTimer("PLOTTING") - plot(image, boxes: bboxes.indices.map { - ("\($0)", bboxes[$0]) - }, margin: 10.0, scale: 0.5).show() + // plot(image, boxes: bboxes.indices.map { + // ("\($0)", bboxes[$0]) + // }, margin: 10.0, scale: 0.5).show() stopTimer("PLOTTING") if verbose { @@ -358,11 +358,12 @@ struct NaiveRae: ParsableCommand { stopTimer("MAKE_GRAPH") if verbose { print("Starting Optimization...") } - if verbose { tracker.optimizer.verbosity = .SUMMARY } + // For LM Optimizer + // if verbose { tracker.optimizer.verbosity = .SUMMARY } - tracker.optimizer.cgls_precision = 1e-7 - tracker.optimizer.precision = 1e-4 - tracker.optimizer.max_iteration = 200 + // tracker.optimizer.cgls_precision = 1e-7 + // tracker.optimizer.precision = 1e-4 + // tracker.optimizer.max_iteration = 200 startTimer("GRAPH_INFER") let prediction = tracker.infer(knownStart: Tuple1(startPose)) @@ -402,9 +403,9 @@ struct NaiveRae: ParsableCommand { print("Creating output plot") } startTimer("PLOTTING") - plot(image, boxes: bboxes.indices.map { - ("\($0)", bboxes[$0]) - }, margin: 10.0, scale: 0.5).show() + // plot(image, boxes: bboxes.indices.map { + // ("\($0)", bboxes[$0]) + // }, margin: 10.0, scale: 0.5).show() stopTimer("PLOTTING") if verbose { @@ -578,11 +579,12 @@ struct NaivePca: ParsableCommand { stopTimer("MAKE_GRAPH") if verbose { print("Starting Optimization...") } - if verbose { tracker.optimizer.verbosity = .SUMMARY } + // For LM Optimizer + // if verbose { tracker.optimizer.verbosity = .SUMMARY } - tracker.optimizer.cgls_precision = 1e-7 - tracker.optimizer.precision = 1e-4 - tracker.optimizer.max_iteration = 200 + // tracker.optimizer.cgls_precision = 1e-7 + // tracker.optimizer.precision = 1e-4 + // tracker.optimizer.max_iteration = 200 startTimer("GRAPH_INFER") let prediction = tracker.infer(knownStart: Tuple1(startPose)) @@ -622,9 +624,9 @@ struct NaivePca: ParsableCommand { print("Creating output plot") } startTimer("PLOTTING") - plot(image, boxes: bboxes.indices.map { - ("\($0)", bboxes[$0]) - }, margin: 10.0, scale: 0.5).show() + // plot(image, boxes: bboxes.indices.map { + // ("\($0)", bboxes[$0]) + // }, margin: 10.0, scale: 0.5).show() stopTimer("PLOTTING") if verbose { diff --git a/Package.resolved b/Package.resolved index 03e8fe88..b25ddd9e 100644 --- a/Package.resolved +++ b/Package.resolved @@ -1,15 +1,6 @@ { "object": { "pins": [ - { - "package": "CSV.swift", - "repositoryURL": "https://github.com/yaslab/CSV.swift.git", - "state": { - "branch": null, - "revision": "81d2874c51db364d7e1d71b0d99018a294c87ac1", - "version": "2.4.3" - } - }, { "package": "Penguin", "repositoryURL": "https://github.com/saeta/penguin.git", @@ -19,15 +10,6 @@ "version": null } }, - { - "package": "Plotly", - "repositoryURL": "https://github.com/vojtamolda/Plotly.swift", - "state": { - "branch": null, - "revision": "6e80119ba37b913e5460459556e2bf58f02eba67", - "version": "0.4.0" - } - }, { "package": "swift-argument-parser", "repositoryURL": "https://github.com/apple/swift-argument-parser.git", @@ -46,24 +28,6 @@ "version": "0.1.0" } }, - { - "package": "swift-models", - "repositoryURL": "https://github.com/tensorflow/swift-models.git", - "state": { - "branch": null, - "revision": "b2fc0325bf9d476bf2d7a4cd0a09d36486c506e4", - "version": null - } - }, - { - "package": "SwiftProtobuf", - "repositoryURL": "https://github.com/apple/swift-protobuf.git", - "state": { - "branch": null, - "revision": "da9a52be9cd36c63993291ce3f1b65dafcd1e826", - "version": "1.14.0" - } - }, { "package": "swift-tools-support-core", "repositoryURL": "https://github.com/apple/swift-tools-support-core.git", diff --git a/Package.swift b/Package.swift index e7dc1074..762940f8 100644 --- a/Package.swift +++ b/Package.swift @@ -30,7 +30,6 @@ let package = Package( .package(name: "TensorBoardX", url: "https://github.com/ProfFan/tensorboardx-s4tf.git", from: "0.1.3"), .package(url: "https://github.com/apple/swift-tools-support-core.git", .branch("swift-5.2-branch")), .package(url: "https://github.com/apple/swift-argument-parser.git", from: "0.3.0"), - .package(name: "Plotly", url: "https://github.com/vojtamolda/Plotly.swift", from: "0.4.0"), ], targets: [ // Targets are the basic building blocks of a package. A target can define a module or a test suite. @@ -57,7 +56,6 @@ let package = Package( name: "BeeDataset", dependencies: [ "SwiftFusion", - "Plotly", "ModelSupport", ]), .target( @@ -86,7 +84,6 @@ let package = Package( "BeeTracking", .product(name: "PenguinParallelWithFoundation", package: "Penguin"), "SwiftFusion", - "Plotly", .product(name: "ArgumentParser", package: "swift-argument-parser"), ], path: "Examples/OISTVisualizationTool"), @@ -97,7 +94,6 @@ let package = Package( "BeeTracking", .product(name: "PenguinParallelWithFoundation", package: "Penguin"), "SwiftFusion", - "Plotly", .product(name: "ArgumentParser", package: "swift-argument-parser"), ], path: "Scripts", diff --git a/Scripts/Andrew01.swift b/Scripts/Andrew01.swift index fdcf8297..051aabb1 100644 --- a/Scripts/Andrew01.swift +++ b/Scripts/Andrew01.swift @@ -11,14 +11,11 @@ import PenguinStructures /// Andrew01: RAE Tracker struct Andrew01: ParsableCommand { - @Option(help: "Run on track number x") - var trackId: Int = 0 - @Option(help: "Run for number of frames") var trackLength: Int = 80 @Option(help: "Size of feature space") - var featureSize: Int = 5 + var featureSize: Int = 256 @Option(help: "Pretrained weights") var weightsFile: String? @@ -27,10 +24,12 @@ struct Andrew01: ParsableCommand { // Make sure you have a folder `Results/andrew01` before running func run() { let np = Python.import("numpy") - let kHiddenDimension = 100 + let kHiddenDimension = 512 let (imageHeight, imageWidth, imageChannels) = (40, 70, 1) + + var rae = DenseRAE( imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels, hiddenDimension: kHiddenDimension, latentDimension: featureSize @@ -42,20 +41,25 @@ struct Andrew01: ParsableCommand { rae.load(weights: np.load("./oist_rae_weight_\(featureSize).npy", allow_pickle: true)) } + // let (imageHeight, imageWidth, imageChannels) = + // (40, 70, 1) + + // let rp = RandomProjection(fromShape: TensorShape([imageHeight, imageWidth, imageChannels]), toFeatureSize: featureSize) + + let trainingDatasetSize = 100 + let dataDir = URL(fileURLWithPath: "./OIST_Data") - let data = OISTBeeVideo(directory: dataDir, length: 100)! - let testData = OISTBeeVideo(directory: dataDir, afterIndex: 100, length: 80)! + let data = OISTBeeVideo(directory: dataDir, length: trainingDatasetSize)! + let testData = OISTBeeVideo(directory: dataDir, afterIndex: trainingDatasetSize, length: trackLength)! let trackerEvaluation = TrackerEvaluationDataset(testData) - let evalTracker: Tracker = {frames, start in - let trainingDatasetSize = 100 var tracker = trainProbabilisticTracker( trainingData: data, encoder: rae, frames: frames, boundingBoxSize: (40, 70), - withFeatureSize: 5, + withFeatureSize: featureSize, fgRandomFrameCount: trainingDatasetSize, bgRandomFrameCount: trainingDatasetSize ) @@ -64,32 +68,36 @@ struct Andrew01: ParsableCommand { return track } + let plt = Python.import("matplotlib.pyplot") + let sequenceCount = 1 + var results = trackerEvaluation.evaluate(evalTracker, sequenceCount: sequenceCount, deltaAnchor: 175, outputFile: "andrew01") - var results = trackerEvaluation.evaluate(evalTracker, sequenceCount: 5, deltaAnchor: 175, outputFile: "andrew01") - - - for (index, value) in results.sequences.prefix(5).enumerated() { + for (index, value) in results.sequences.prefix(sequenceCount).enumerated() { var i: Int = 0 zip(value.subsequences.first!.frames, zip(value.subsequences.first!.prediction, value.subsequences.first!.groundTruth)).map { - let (fig, axes) = plotPatchWithGT(frame: $0.0, actual: $0.1.0.center, expected: $0.1.1.center) + let (fig, axes) = plotFrameWithPatches(frame: $0.0, actual: $0.1.0.center, expected: $0.1.1.center, firstGroundTruth: value.subsequences.first!.groundTruth.first!.center) fig.savefig("Results/andrew01/sequence\(index)/andrew01_\(i).png", bbox_inches: "tight") + plt.close("all") i = i + 1 } - let plt = Python.import("matplotlib.pyplot") + + let (fig, axes) = plt.subplots(1, 2, figsize: Python.tuple([20, 20])).tuple2 fig.suptitle("Tracking positions and Subsequence Average Overlap with Accuracy \(String(format: "%.2f", value.subsequences.first!.metrics.accuracy)) and Robustness \(value.subsequences.first!.metrics.robustness).") value.subsequences.map { - plotTrajectory( - track: $0.prediction.map{$0.center}, withGroundTruth: $0.groundTruth.map{$0.center}, on: axes[0], - withTrackColors: plt.cm.jet, withGtColors: plt.cm.gray + plotPoseDifference( + track: $0.prediction.map{$0.center}, withGroundTruth: $0.groundTruth.map{$0.center}, on: axes[0] ) } plotOverlap( metrics: value.subsequences.first!.metrics, on: axes[1] ) - fig.savefig("Results/andrew01/andrew01_subsequence\(index).pdf", bbox_inches: "tight") + fig.savefig("Results/andrew01/andrew01_subsequence\(index).png", bbox_inches: "tight") + print("Accuracy for sequence is \(value.sequenceMetrics.accuracy) with Robustness of \(value.sequenceMetrics.robustness)") } + + print("Accuracy for all sequences is \(results.trackerMetrics.accuracy) with Robustness of \(results.trackerMetrics.robustness)") @@ -103,4 +111,4 @@ fileprivate func unpack(_ t: Tuple2) -> (A, B) { /// Returns `t` as a Swift tuple. fileprivate func unpack(_ t: Tuple1) -> (A) { return (t.head) -} \ No newline at end of file +} diff --git a/Scripts/Andrew05.swift b/Scripts/Andrew05.swift new file mode 100644 index 00000000..cedd68b5 --- /dev/null +++ b/Scripts/Andrew05.swift @@ -0,0 +1,120 @@ +import ArgumentParser + +import SwiftFusion +import BeeDataset +import BeeTracking +import TensorFlow +import PythonKit +import Foundation + +import PenguinStructures + +/// Andrew01: RAE Tracker +struct Andrew05: ParsableCommand { + + @Option(help: "Run for number of frames") + var trackLength: Int = 80 + + @Option(help: "Size of feature space") + var featureSize: Int = 256 + + @Option(help: "Pretrained weights") + var weightsFile: String? + + + // Comparison SiamMask and RAE + func run() { + let np = Python.import("numpy") + let plt = Python.import("matplotlib.pyplot") + let pickle = Python.import("pickle") + + let trainingDatasetSize = 100 + + let dataDir = URL(fileURLWithPath: "./OIST_Data") + let data = OISTBeeVideo(directory: dataDir, length: trainingDatasetSize)! + let testData = OISTBeeVideo(directory: dataDir, afterIndex: trainingDatasetSize, length: trackLength)! + let testData2 = OISTBeeVideo(directory: dataDir, afterIndex: trainingDatasetSize, length: trackLength)! + + let trackerEvaluation = TrackerEvaluationDataset(testData) + + var i = 0 + let evalTrackerSiam: Tracker = {frames, start in + let decoder = JSONDecoder() + let trackPath = "Results/brando03/prediction_siammask_sequence_\(i).json" + let decodedTrack = try! decoder.decode([OrientedBoundingBox].self, from: Data(contentsOf: URL(fileURLWithPath: trackPath))) + i = i + 1 + return decodedTrack + } + let sequenceCount = 19 + + var results_siam = trackerEvaluation.evaluate(evalTrackerSiam, sequenceCount: sequenceCount, deltaAnchor: 175, outputFile: "andrew02") + i = 0 + let trackerEvaluation2 = TrackerEvaluationDataset(testData2) + let evalTrackerRae: Tracker = {frames, start in + let decoder = JSONDecoder() + let trackPath = "rae_256_updated_preds/prediction_rae_256_sequence_\(i).json" + let decodedTrack = try! decoder.decode([OrientedBoundingBox].self, from: Data(contentsOf: URL(fileURLWithPath: trackPath))) + i = i + 1 + return decodedTrack + } + var results_rae = trackerEvaluation2.evaluate(evalTrackerRae, sequenceCount: sequenceCount, deltaAnchor: 175, outputFile: "andrew01") + + + + + // for (index, value) in results_rae.sequences.prefix(sequenceCount).enumerated() { + for j in 0...sequenceCount-1 { + let value_rae = results_rae.sequences.prefix(sequenceCount)[j] + let index = j + let value_siam = results_siam.sequences.prefix(sequenceCount)[j] + let value = value_rae + var i: Int = 0 + // zip(value.subsequences.first!.frames, zip(value.subsequences.first!.prediction, value.subsequences.first!.groundTruth)).map { + // let (fig, axes) = plotFrameWithPatches(frame: $0.0, actual: $0.1.0.center, expected: $0.1.1.center, firstGroundTruth: value.subsequences.first!.groundTruth.first!.center) + // fig.savefig("Results/brando03/sequence\(index)/brando03\(i).png", bbox_inches: "tight") + // plt.close("all") + // i = i + 1 + // } + zip(value_rae.subsequences.first!.frames, zip(zip(value_rae.subsequences.first!.prediction,value_siam.subsequences.first!.prediction), value_rae.subsequences.first!.groundTruth)).map { + // let (fig, axes) = plotFrameWithPatches(frame: $0.0, actual: $0.1.0.center, expected: $0.1.1.center, firstGroundTruth: value.subsequences.first!.groundTruth.first!.center) + let (fig, _) = plotFrameWithPatches2(frame: $0.0, actual_box1: $0.1.0.0, actual_box2: $0.1.0.1, expected: $0.1.1.center, firstGroundTruth: value_rae.subsequences.first!.groundTruth.first!.center) + fig.savefig("Results/andrew01/sequence\(index)/comparison_\(i).png", bbox_inches: "tight") + plt.close("all") + i = i + 1 + } + + // let (fig, axes) = plt.subplots(1, 2, figsize: Python.tuple([20, 20])).tuple2 + // fig.suptitle("Tracking positions and Subsequence Average Overlap with Accuracy \(String(format: "%.2f", value_rae.subsequences.first!.metrics.accuracy)) and Robustness \(value_rae.subsequences.first!.metrics.robustness).") + // print("First Ground Truth") + // value_rae.subsequences.map { + // print($0.prediction.first!) + // $0.prediction.map{print("\(round($0.center.t.x)) \(round($0.center.t.y)) \($0.center.rot.theta) \(40) \(70)")} + + // plotPoseDifference( + // track: $0.prediction.map{$0.center}, withGroundTruth: $0.groundTruth.map{$0.center}, on: axes[0] + // ) + // } + // plotOverlap( + // metrics: value_rae.subsequences.first!.metrics, on: axes[1] + // ) + // fig.savefig("Results/andrew01/andrew01_subsequence\(index).png", bbox_inches: "tight") + print("Accuracy for sequence is \(value_rae.sequenceMetrics.accuracy) with Robustness of \(value_rae.sequenceMetrics.robustness)") + } + + // print("Accuracy for all sequences is \(results.trackerMetrics.accuracy) with Robustness of \(results.trackerMetrics.robustness)") + // let f = Python.open("Results/EAO/rae_em_\(featureSize).data", "wb") + // pickle.dump(results.expectedAverageOverlap.curve, f) + + + } + +} + +/// Returns `t` as a Swift tuple. +fileprivate func unpack(_ t: Tuple2) -> (A, B) { + return (t.head, t.tail.head) +} +/// Returns `t` as a Swift tuple. +fileprivate func unpack(_ t: Tuple1) -> (A) { + return (t.head) +} \ No newline at end of file diff --git a/Scripts/Andrew06.swift b/Scripts/Andrew06.swift new file mode 100644 index 00000000..337d4d4e --- /dev/null +++ b/Scripts/Andrew06.swift @@ -0,0 +1,362 @@ +import ArgumentParser + +import SwiftFusion +import BeeDataset +import BeeTracking +import TensorFlow +import PythonKit +import Foundation +import ModelSupport + +import PenguinStructures + +let tf = Python.import("tensorflow") +let np = Python.import("numpy") +let pickle = Python.import("pickle") + +// Optional to enable GPU training +// let _ = _ExecutionContext.global +// let device = Device.defaultXLA +let device = Device.default +let modelName = "BiT-M-R50x1" +var knownModels = [String: String]() +let knownDatasetSizes:[String: (Int, Int)] = [ + "bee_dataset": (40, 70) +] + +public struct LabeledData { + /// The `data` of our sample (usually used as input for a model). + public let data: Data + /// The `label` of our sample (usually used as target for a model). + public let label: Label + + /// Creates an instance from `data` and `label`. + public init(data: Data, label: Label) { + self.data = data + self.label = label + } +} + +// Script to train and track with Big Transfer +struct Andrew06: ParsableCommand { + /// This error indicates that BiT-Hyperrule cannot find the name of the dataset in the + /// knownDatasetSizes dictionary + enum DatasetNotFoundError: Error { + case invalidInput(String) + } + + /// Return relevent ResNet enumerated type based on weights loaded + /// + /// - Parameters: + /// - modelName: the name of the model pulled from the big transfer repository + /// to grab the enumerated type for + /// - Returns: ResNet enumerated type for BigTransfer model + func getModelUnits(modelName: String) -> BigTransfer.Depth { + if modelName.contains("R50") { + return .resNet50 + } + else if modelName.contains("R101") { + return .resNet101 + } + else { + return .resNet152 + } + } + + /// Get updated image resolution based on the specifications in BiT-Hyperrule + /// + /// - Parameters: + /// - originalResolution: the source resolution for the current image dataset + /// - Returns: new resolution for images based on BiT-Hyperrule + func getResolution(originalResolution: (Int, Int)) -> (Int, Int) { + let area = originalResolution.0 * originalResolution.1 + return area < 96*96 ? (160, 128) : (512, 480) + } + + /// Get the source resolution for the current image dataset from the knownDatasetSizes dictionary + /// + /// - Parameters: + /// - datasetName: name of the current dataset you are using + /// - Returns: new resolution for specified dataset + /// - Throws: + /// - DatasetNotFoundError: will throw an error if the dataset cannot be found in knownDatasetSizes dictionary + func getResolutionFromDataset(datasetName: String) throws -> (Int, Int) { + if let resolution = knownDatasetSizes[datasetName] { + return getResolution(originalResolution: resolution) + } + print("Unsupported dataset " + datasetName + ". Add your own here :)") + throw DatasetNotFoundError.invalidInput(datasetName) + + } + + /// Get training mixup parameters based on Bit-Hyperrule specification for dataset sizes + /// + /// - Parameters: + /// - datasetSize: number of images in the current dataset + /// - Returns: mixup alpha based on number of images + func getMixUp(datasetSize: Int) -> Double { + return datasetSize < 20000 ? 0.0 : 0.1 + } + + /// Get the learning rate schedule based on the dataset size + /// + /// - Parameters: + /// - datasetSize: number of images in the current dataset + /// - Returns: learning rate schedule based on the current dataset + func getSchedule(datasetSize: Int) -> Array { + if datasetSize < 20000{ + return [100, 200, 300, 400, 500] + } + else if datasetSize < 500000 { + return [500, 3000, 6000, 9000, 10000] + } + else { + return [500, 6000, 12000, 18000, 20000] + } + } + + /// Get learning rate at the current step given the dataset size and base learning rate + /// + /// - Parameters: + /// - step: current training step + /// - datasetSize: number of images in the dataset + /// - baseLearningRate: starting learning rate to modify + /// - Returns: learning rate at the current step in training + func getLearningRate(step: Int, datasetSize: Int, baseLearningRate: Float = 0.003) -> Float? { + let supports = getSchedule(datasetSize: datasetSize) + // Linear warmup + if step < supports[0] { + return baseLearningRate * Float(step) / Float(supports[0]) + } + // End of training + else if step >= supports.last! { + return nil + } + // Staircase decays by factor of 10 + else { + var baseLearningRate = baseLearningRate + for s in supports[1...] { + if s < step { + baseLearningRate = baseLearningRate / 10.0 + } + } + return baseLearningRate + } + } + public typealias Datum = (patch: Tensor, label: Tensor) + public typealias LabeledImage = LabeledData, Tensor> + public typealias Batches = Slices, label: Tensor)], ArraySlice>> + + func getTrainingDataBigTransfer( + from dataset: OISTBeeVideo, + numberForeground: Int = 10000, + numberBackground: Int = 10000 + ) -> [Datum] { + let bgBoxes = dataset.makeBackgroundBoundingBoxes(patchSize: (40, 70), batchSize: numberBackground).map { + (patch: Tensor($0.frame!.patch(at: $0.obb)).unstacked(alongAxis: 2)[0], label: Tensor(0)) + } + let fgBoxes = dataset.makeForegroundBoundingBoxes(patchSize: (40, 70), batchSize: numberForeground).map { + (patch: Tensor($0.frame!.patch(at: $0.obb)).unstacked(alongAxis: 2)[0], label: Tensor(1)) + } + + var boxes = fgBoxes + bgBoxes + return boxes.map{(patch: Tensor(stacking: [$0.patch, $0.patch, $0.patch], alongAxis: 2), label: $0.label)} + } + + /// Stores the training statistics for the BigTransfer training process which are different than usual + /// because the mixedup labels must be accounted for while running training statistics. + struct BigTransferTrainingStatistics { + var correctGuessCount = Tensor(0, on: Device.default) + var totalGuessCount = Tensor(0, on: Device.default) + var totalLoss = Tensor(0, on: Device.default) + var batches: Int = 0 + var accuracy: Float { + Float(correctGuessCount.scalarized()) / Float(totalGuessCount.scalarized()) * 100 + } + var averageLoss: Float { totalLoss.scalarized() / Float(batches) } + + init(on device: Device = Device.default) { + correctGuessCount = Tensor(0, on: device) + totalGuessCount = Tensor(0, on: device) + totalLoss = Tensor(0, on: device) + } + + mutating func update(logits: Tensor, labels: Tensor, loss: Tensor) { + let correct = logits.argmax(squeezingAxis: 1) .== labels.argmax(squeezingAxis: 1) + correctGuessCount += Tensor(correct).sum() + totalGuessCount += Int32(labels.shape[0]) + totalLoss += loss + batches += 1 + } + } + + fileprivate func makeBatch( + samples: BatchSamples, device: Device) -> LabeledImage where BatchSamples.Element == (patch: Tensor, label: Tensor) { + let labels = Tensor(samples.map(\.label)) + let imageTensor = Tensor(samples.map(\.patch)) + return LabeledImage(data: imageTensor, label: labels) +} + // Train Big Transfer + func run() { + let dataDir = URL(fileURLWithPath: "./OIST_Data") + + let trainingDataset = OISTBeeVideo(directory: dataDir, length: 80)! + let validationDataset = OISTBeeVideo(directory: dataDir, afterIndex: 80, length: 20)! + + let training = getTrainingDataBigTransfer(from: trainingDataset, numberForeground: 20000, numberBackground: 20000) + let validation = getTrainingDataBigTransfer(from: validationDataset, numberForeground: 600, numberBackground: 600) + + + let classCount = 2 + var bitModel = BigTransfer(classCount: classCount, depth: getModelUnits(modelName: modelName), modelName: modelName) + let dataCount = 40000 + + var optimizer = SGD(for: bitModel, learningRate: 0.003, momentum: 0.9) + optimizer = SGD(copying: optimizer, to: device) + + print("Beginning training...") + var batchSize: Int = 16 + var currStep: Int = 1 + let lrSupports = getSchedule(datasetSize: dataCount) + let scheduleLength = lrSupports.last! + let stepsPerEpoch = dataCount / batchSize + let epochCount = scheduleLength / stepsPerEpoch + let resizeSize = getResolution(originalResolution: (40, 70)) + + let trainingData = TrainingEpochs(samples: training, batchSize: batchSize).lazy.map { + (batches: Batches) -> LazyMapSequence in + return batches.lazy.map{ makeBatch(samples: $0, device: device) } + } + + let validationData = validation.inBatches(of: batchSize).lazy.map { + makeBatch(samples: $0, device: device) + } + + for (epoch, batches) in trainingData.prefix(epochCount).enumerated() { + let start = Date() + var trainStats = BigTransferTrainingStatistics(on: device) + var testStats = BigTransferTrainingStatistics(on: device) + + Context.local.learningPhase = .training + for batch in batches { + if let newLearningRate = getLearningRate(step: currStep, datasetSize: dataCount, baseLearningRate: 0.003) { + optimizer.learningRate = newLearningRate + currStep = currStep + 1 + } + else { + continue + } + + var (eagerImages, eagerLabels) = (batch.data, batch.label) + let resized = resize(images: eagerImages, size: (resizeSize.0, resizeSize.1)) + let flipped = tf.image.random_flip_left_right(resized.makeNumpyArray()) + var newLabels = Tensor(Tensor(oneHotAtIndices: eagerLabels, depth: classCount)) + + let images = Tensor(copying: Tensor(numpy: flipped.numpy())!, to: device) + let labels = Tensor(copying: newLabels, to: device) + let 𝛁model = TensorFlow.gradient(at: bitModel) { bitModel -> Tensor in + let ŷ = bitModel(images) + let loss = softmaxCrossEntropy(logits: ŷ, probabilities: labels) + trainStats.update(logits: ŷ, labels: labels, loss: loss) + return loss + } + + optimizer.update(&bitModel, along: 𝛁model) + + LazyTensorBarrier() + } + + print("Checking validation statistics...") + Context.local.learningPhase = .inference + for batch in validationData { + var (eagerImages, eagerLabels) = (batch.data, batch.label) + let resized = resize(images: eagerImages, size: (resizeSize.0, resizeSize.1)) + let newLabels = Tensor(Tensor(oneHotAtIndices: eagerLabels, depth: classCount)) + let images = Tensor(copying: resized, to: device) + let labels = Tensor(copying: newLabels, to: device) + let ŷ = bitModel(images) + let loss = softmaxCrossEntropy(logits: ŷ, probabilities: labels) + LazyTensorBarrier() + testStats.update(logits: ŷ, labels: labels, loss: loss) + } + + print( + """ + [Epoch \(epoch)] \ + Training Loss: \(String(format: "%.3f", trainStats.averageLoss)), \ + Training Accuracy: \(trainStats.correctGuessCount)/\(trainStats.totalGuessCount) \ + (\(String(format: "%.1f", trainStats.accuracy))%), \ + Test Loss: \(String(format: "%.3f", testStats.averageLoss)), \ + Test Accuracy: \(testStats.correctGuessCount)/\(testStats.totalGuessCount) \ + (\(String(format: "%.1f", testStats.accuracy))%) \ + seconds per epoch: \(String(format: "%.1f", Date().timeIntervalSince(start))) + """) + } + + let testData = OISTBeeVideo(directory: dataDir, afterIndex: 100, length: 80)! + + let trackerEvaluation = TrackerEvaluationDataset(testData) + + + + let evalTracker: Tracker = {frames, start in + var tracker = makeProbabilisticTracker2( + model: bitModel, + frames: frames, + targetSize: (40, 70) + ) + let prediction = tracker.infer(knownStart: Tuple1(start.center), withSampling: true) + let track = tracker.frameVariableIDs.map { OrientedBoundingBox(center: prediction[unpack($0)], rows: 40, cols:70) } + return track + + } + + let plt = Python.import("matplotlib.pyplot") + let sequenceCount = 19 + var results = trackerEvaluation.evaluate(evalTracker, sequenceCount: sequenceCount, deltaAnchor: 175, outputFile: "andrew01") + + for (index, value) in results.sequences.prefix(sequenceCount).enumerated() { + var i: Int = 0 + zip(value.subsequences.first!.frames, zip(value.subsequences.first!.prediction, value.subsequences.first!.groundTruth)).map { + let (fig, axes) = plotFrameWithPatches(frame: $0.0, actual: $0.1.0.center, expected: $0.1.1.center, firstGroundTruth: value.subsequences.first!.groundTruth.first!.center) + fig.savefig("Results/andrew01/sequence\(index)/andrew01_\(i).png", bbox_inches: "tight") + plt.close("all") + i = i + 1 + } + + + let (fig, axes) = plt.subplots(1, 2, figsize: Python.tuple([20, 20])).tuple2 + fig.suptitle("Tracking positions and Subsequence Average Overlap with Accuracy \(String(format: "%.2f", value.subsequences.first!.metrics.accuracy)) and Robustness \(value.subsequences.first!.metrics.robustness).") + + value.subsequences.map { + let encoder = JSONEncoder() + let data = try! encoder.encode($0.prediction) + FileManager.default.createFile(atPath: "prediction_bigtransfer_sequence_\(index).json", contents: data, attributes: nil) + plotPoseDifference( + track: $0.prediction.map{$0.center}, withGroundTruth: $0.groundTruth.map{$0.center}, on: axes[0] + ) + } + plotOverlap( + metrics: value.subsequences.first!.metrics, on: axes[1] + ) + fig.savefig("Results/andrew01/andrew01_subsequence\(index).png", bbox_inches: "tight") + print("Accuracy for sequence is \(value.sequenceMetrics.accuracy) with Robustness of \(value.sequenceMetrics.robustness)") + } + + print("Accuracy for all sequences is \(results.trackerMetrics.accuracy) with Robustness of \(results.trackerMetrics.robustness)") + let f = Python.open("Results/EAO/bigtransfer.data", "wb") + pickle.dump(results.expectedAverageOverlap.curve, f) + + + } + +} + +/// Returns `t` as a Swift tuple. +fileprivate func unpack(_ t: Tuple2) -> (A, B) { + return (t.head, t.tail.head) +} +/// Returns `t` as a Swift tuple. +fileprivate func unpack(_ t: Tuple1) -> (A) { + return (t.head) +} \ No newline at end of file diff --git a/Scripts/Andrew07.swift b/Scripts/Andrew07.swift new file mode 100644 index 00000000..83eb3ddb --- /dev/null +++ b/Scripts/Andrew07.swift @@ -0,0 +1,342 @@ +import ArgumentParser + +import SwiftFusion +import BeeDataset +import BeeTracking +import TensorFlow +import PythonKit +import Foundation +import ModelSupport + +import PenguinStructures + +// Error gradient visualization script for Big Transfer +struct Andrew07: ParsableCommand { + /// This error indicates that BiT-Hyperrule cannot find the name of the dataset in the + /// knownDatasetSizes dictionary + enum DatasetNotFoundError: Error { + case invalidInput(String) + } + + /// Return relevent ResNet enumerated type based on weights loaded + /// + /// - Parameters: + /// - modelName: the name of the model pulled from the big transfer repository + /// to grab the enumerated type for + /// - Returns: ResNet enumerated type for BigTransfer model + func getModelUnits(modelName: String) -> BigTransfer.Depth { + if modelName.contains("R50") { + return .resNet50 + } + else if modelName.contains("R101") { + return .resNet101 + } + else { + return .resNet152 + } + } + + /// Get updated image resolution based on the specifications in BiT-Hyperrule + /// + /// - Parameters: + /// - originalResolution: the source resolution for the current image dataset + /// - Returns: new resolution for images based on BiT-Hyperrule + func getResolution(originalResolution: (Int, Int)) -> (Int, Int) { + let area = originalResolution.0 * originalResolution.1 + return area < 96*96 ? (160, 128) : (512, 480) + } + + /// Get the source resolution for the current image dataset from the knownDatasetSizes dictionary + /// + /// - Parameters: + /// - datasetName: name of the current dataset you are using + /// - Returns: new resolution for specified dataset + /// - Throws: + /// - DatasetNotFoundError: will throw an error if the dataset cannot be found in knownDatasetSizes dictionary + func getResolutionFromDataset(datasetName: String) throws -> (Int, Int) { + if let resolution = knownDatasetSizes[datasetName] { + return getResolution(originalResolution: resolution) + } + print("Unsupported dataset " + datasetName + ". Add your own here :)") + throw DatasetNotFoundError.invalidInput(datasetName) + + } + + /// Get training mixup parameters based on Bit-Hyperrule specification for dataset sizes + /// + /// - Parameters: + /// - datasetSize: number of images in the current dataset + /// - Returns: mixup alpha based on number of images + func getMixUp(datasetSize: Int) -> Double { + return datasetSize < 20000 ? 0.0 : 0.1 + } + + /// Get the learning rate schedule based on the dataset size + /// + /// - Parameters: + /// - datasetSize: number of images in the current dataset + /// - Returns: learning rate schedule based on the current dataset + func getSchedule(datasetSize: Int) -> Array { + if datasetSize < 20000{ + return [100, 200, 300, 400, 500] + } + else if datasetSize < 500000 { + return [500, 3000, 6000, 9000, 10000] + } + else { + return [500, 6000, 12000, 18000, 20000] + } + } + + /// Get learning rate at the current step given the dataset size and base learning rate + /// + /// - Parameters: + /// - step: current training step + /// - datasetSize: number of images in the dataset + /// - baseLearningRate: starting learning rate to modify + /// - Returns: learning rate at the current step in training + func getLearningRate(step: Int, datasetSize: Int, baseLearningRate: Float = 0.003) -> Float? { + let supports = getSchedule(datasetSize: datasetSize) + // Linear warmup + if step < supports[0] { + return baseLearningRate * Float(step) / Float(supports[0]) + } + // End of training + else if step >= supports.last! { + return nil + } + // Staircase decays by factor of 10 + else { + var baseLearningRate = baseLearningRate + for s in supports[1...] { + if s < step { + baseLearningRate = baseLearningRate / 10.0 + } + } + return baseLearningRate + } + } + public typealias Datum = (patch: Tensor, label: Tensor) + public typealias LabeledImage = LabeledData, Tensor> + public typealias Batches = Slices, label: Tensor)], ArraySlice>> + + func getTrainingDataBigTransfer( + from dataset: OISTBeeVideo, + numberForeground: Int = 10000, + numberBackground: Int = 10000 + ) -> [Datum] { + let bgBoxes = dataset.makeBackgroundBoundingBoxes(patchSize: (40, 70), batchSize: numberBackground).map { + (patch: Tensor($0.frame!.patch(at: $0.obb)).unstacked(alongAxis: 2)[0], label: Tensor(0)) + } + let fgBoxes = dataset.makeForegroundBoundingBoxes(patchSize: (40, 70), batchSize: numberForeground).map { + (patch: Tensor($0.frame!.patch(at: $0.obb)).unstacked(alongAxis: 2)[0], label: Tensor(1)) + } + + var boxes = fgBoxes + bgBoxes + return boxes.map{(patch: Tensor(stacking: [$0.patch, $0.patch, $0.patch], alongAxis: 2), label: $0.label)} + } + + /// Stores the training statistics for the BigTransfer training process which are different than usual + /// because the mixedup labels must be accounted for while running training statistics. + struct BigTransferTrainingStatistics { + var correctGuessCount = Tensor(0, on: Device.default) + var totalGuessCount = Tensor(0, on: Device.default) + var totalLoss = Tensor(0, on: Device.default) + var batches: Int = 0 + var accuracy: Float { + Float(correctGuessCount.scalarized()) / Float(totalGuessCount.scalarized()) * 100 + } + var averageLoss: Float { totalLoss.scalarized() / Float(batches) } + + init(on device: Device = Device.default) { + correctGuessCount = Tensor(0, on: device) + totalGuessCount = Tensor(0, on: device) + totalLoss = Tensor(0, on: device) + } + + mutating func update(logits: Tensor, labels: Tensor, loss: Tensor) { + let correct = logits.argmax(squeezingAxis: 1) .== labels.argmax(squeezingAxis: 1) + correctGuessCount += Tensor(correct).sum() + totalGuessCount += Int32(labels.shape[0]) + totalLoss += loss + batches += 1 + } + } + + fileprivate func makeBatch( + samples: BatchSamples, device: Device) -> LabeledImage where BatchSamples.Element == (patch: Tensor, label: Tensor) { + let labels = Tensor(samples.map(\.label)) + let imageTensor = Tensor(samples.map(\.patch)) + return LabeledImage(data: imageTensor, label: labels) +} + // Train Big Transfer + func run() { + let plt = Python.import("matplotlib.pyplot") + let dataDir = URL(fileURLWithPath: "./OIST_Data") + + let trainingDataset = OISTBeeVideo(directory: dataDir, length: 80)! + let validationDataset = OISTBeeVideo(directory: dataDir, afterIndex: 80, length: 20)! + + let training = getTrainingDataBigTransfer(from: trainingDataset, numberForeground: 3000, numberBackground: 3000) + let validation = getTrainingDataBigTransfer(from: validationDataset, numberForeground: 600, numberBackground: 600) + + + let classCount = 2 + var bitModel = BigTransfer(classCount: classCount, depth: getModelUnits(modelName: modelName), modelName: modelName) + let dataCount = 6000 + + var optimizer = SGD(for: bitModel, learningRate: 0.003, momentum: 0.9) + optimizer = SGD(copying: optimizer, to: device) + + print("Beginning training...") + var batchSize: Int = 16 + var currStep: Int = 1 + let lrSupports = getSchedule(datasetSize: dataCount) + let scheduleLength = lrSupports.last! + let stepsPerEpoch = dataCount / batchSize + let epochCount = scheduleLength / stepsPerEpoch + let resizeSize = getResolution(originalResolution: (40, 70)) + + let trainingData = TrainingEpochs(samples: training, batchSize: batchSize).lazy.map { + (batches: Batches) -> LazyMapSequence in + return batches.lazy.map{ makeBatch(samples: $0, device: device) } + } + + let validationData = validation.inBatches(of: batchSize).lazy.map { + makeBatch(samples: $0, device: device) + } + + for (epoch, batches) in trainingData.prefix(epochCount).enumerated() { + let start = Date() + var trainStats = BigTransferTrainingStatistics(on: device) + var testStats = BigTransferTrainingStatistics(on: device) + + Context.local.learningPhase = .training + for batch in batches { + if let newLearningRate = getLearningRate(step: currStep, datasetSize: dataCount, baseLearningRate: 0.003) { + optimizer.learningRate = newLearningRate + currStep = currStep + 1 + } + else { + continue + } + + var (eagerImages, eagerLabels) = (batch.data, batch.label) + let resized = resize(images: eagerImages, size: (resizeSize.0, resizeSize.1)) + //let cropped = tf.image.random_crop(resized.makeNumpyArray(), [batchSize, resizeSize.0, resizeSize.1, 3]) + let flipped = tf.image.random_flip_left_right(resized.makeNumpyArray()) + var newLabels = Tensor(Tensor(oneHotAtIndices: eagerLabels, depth: classCount)) + + let images = Tensor(copying: Tensor(numpy: flipped.numpy())!, to: device) + let labels = Tensor(copying: newLabels, to: device) + let 𝛁model = TensorFlow.gradient(at: bitModel) { bitModel -> Tensor in + let ŷ = bitModel(images) + let loss = softmaxCrossEntropy(logits: ŷ, probabilities: labels) + trainStats.update(logits: ŷ, labels: labels, loss: loss) + return loss + } + + optimizer.update(&bitModel, along: 𝛁model) + + LazyTensorBarrier() + } + + print("Checking validation statistics...") + Context.local.learningPhase = .inference + for batch in validationData { + var (eagerImages, eagerLabels) = (batch.data, batch.label) + let resized = resize(images: eagerImages, size: (resizeSize.0, resizeSize.1)) + let newLabels = Tensor(Tensor(oneHotAtIndices: eagerLabels, depth: classCount)) + let images = Tensor(copying: resized, to: device) + let labels = Tensor(copying: newLabels, to: device) + let ŷ = bitModel(images) + let loss = softmaxCrossEntropy(logits: ŷ, probabilities: labels) + LazyTensorBarrier() + testStats.update(logits: ŷ, labels: labels, loss: loss) + } + + print( + """ + [Epoch \(epoch)] \ + Training Loss: \(String(format: "%.3f", trainStats.averageLoss)), \ + Training Accuracy: \(trainStats.correctGuessCount)/\(trainStats.totalGuessCount) \ + (\(String(format: "%.1f", trainStats.accuracy))%), \ + Test Loss: \(String(format: "%.3f", testStats.averageLoss)), \ + Test Accuracy: \(testStats.correctGuessCount)/\(testStats.totalGuessCount) \ + (\(String(format: "%.1f", testStats.accuracy))%) \ + seconds per epoch: \(String(format: "%.1f", Date().timeIntervalSince(start))) + """) + } + + let testData = OISTBeeVideo(directory: dataDir, afterIndex: 100, length: 80)! + + let trackerEvaluation = TrackerEvaluationDataset(testData) + + let frames = testData.frames + let firstTrack = testData.tracks[0] + let firstFrame = frames[0] + let firstObb = firstTrack.boxes[0] + + let range = 100.0 + + let firstGroundTruth = firstObb.center + print("oBB coordinates", firstGroundTruth.t.x, firstGroundTruth.t.y) + + let (fig, axs) = plt.subplots(1,2).tuple2 + let fr = np.squeeze(firstFrame.makeNumpyArray()) + axs[0].imshow(fr / 255.0, cmap: "gray") + + + axs[0].set_xlim(firstGroundTruth.t.x - range/2, firstGroundTruth.t.x + range/2) + axs[0].set_ylim(firstGroundTruth.t.y - range/2, firstGroundTruth.t.y + range/2) + axs[1].set_xlim(0, range) + axs[1].set_ylim(0, range) + + let x = firstGroundTruth.t.x + let y = firstGroundTruth.t.y + + var values = Tensor(zeros: [Int(range), Int(range)]) + + for i in 0...Int(range)-1 { + for j in 0...Int(range)-1 { + let t = Vector2(x-range/2+Double(i), y-range/2+Double(j)) + let p = Pose2(firstGroundTruth.rot, t) + var v = VariableAssignments() + let poseId = v.store(p) + let startpose = v[poseId] + var fg = FactorGraph() + let factorNNC = ProbablisticTrackingFactor2(poseId, + measurement: firstFrame, + classifier: bitModel, + patchSize: (40, 70), + appearanceModelSize: (40, 70) + ) + fg.store(factorNNC) + values[i,j] = Tensor(factorNNC.errorVector(v[poseId]).x) + print(j) + print(i) + } + } + let min_val = values.min() + if Double(min_val)! < 0 { + values = values-min_val + } + values = values/values.max()*255 + print(values[0...,0]) + print(values.shape) + axs[1].imshow(values.makeNumpyArray()) + + fig.savefig("./Results/andrew01/vizual_NNC.png", bbox_inches: "tight") + } + + +} + +/// Returns `t` as a Swift tuple. +fileprivate func unpack(_ t: Tuple2) -> (A, B) { + return (t.head, t.tail.head) +} +/// Returns `t` as a Swift tuple. +fileprivate func unpack(_ t: Tuple1) -> (A) { + return (t.head) +} \ No newline at end of file diff --git a/Scripts/Andrew08.swift b/Scripts/Andrew08.swift new file mode 100644 index 00000000..d9c24018 --- /dev/null +++ b/Scripts/Andrew08.swift @@ -0,0 +1,439 @@ +import ArgumentParser + +import SwiftFusion +import BeeDataset +import BeeTracking +import TensorFlow +import PythonKit +import Foundation +import ModelSupport + +import PenguinStructures + + +struct Andrew08: ParsableCommand { + /// This error indicates that BiT-Hyperrule cannot find the name of the dataset in the + /// knownDatasetSizes dictionary + enum DatasetNotFoundError: Error { + case invalidInput(String) + } + func initialize_and_perturb(p: Pose2) -> (Double, Double, Double, Pose2, VariableAssignments, TypedID, FactorGraph) { + var v = VariableAssignments() + let poseId = v.store(p) + v[poseId].perturbWith(stddev: Vector3(0.3, 8, 4.6)) + let dx = v[poseId].t.x - p.t.x + let dy = v[poseId].t.y - p.t.y + let dtheta = v[poseId].rot.theta - p.rot.theta + let startpose = v[poseId] + let fg = FactorGraph() + + return (dx, dy, dtheta, startpose, v, poseId, fg) + } + + func initialize_empty_arrays() -> (Bool, [Double], [Double], [Double], [Double]) { + var conv = true + var errors = [Double]() + var xs = [Double]() + var ys = [Double]() + var thetas = [Double]() + return (conv, errors, xs, ys, thetas) + } + /// Return relevent ResNet enumerated type based on weights loaded + /// + /// - Parameters: + /// - modelName: the name of the model pulled from the big transfer repository + /// to grab the enumerated type for + /// - Returns: ResNet enumerated type for BigTransfer model + func getModelUnits(modelName: String) -> BigTransfer.Depth { + if modelName.contains("R50") { + return .resNet50 + } + else if modelName.contains("R101") { + return .resNet101 + } + else { + return .resNet152 + } + } + + /// Get updated image resolution based on the specifications in BiT-Hyperrule + /// + /// - Parameters: + /// - originalResolution: the source resolution for the current image dataset + /// - Returns: new resolution for images based on BiT-Hyperrule + func getResolution(originalResolution: (Int, Int)) -> (Int, Int) { + let area = originalResolution.0 * originalResolution.1 + return area < 96*96 ? (160, 128) : (512, 480) + } + + /// Get the source resolution for the current image dataset from the knownDatasetSizes dictionary + /// + /// - Parameters: + /// - datasetName: name of the current dataset you are using + /// - Returns: new resolution for specified dataset + /// - Throws: + /// - DatasetNotFoundError: will throw an error if the dataset cannot be found in knownDatasetSizes dictionary + func getResolutionFromDataset(datasetName: String) throws -> (Int, Int) { + if let resolution = knownDatasetSizes[datasetName] { + return getResolution(originalResolution: resolution) + } + print("Unsupported dataset " + datasetName + ". Add your own here :)") + throw DatasetNotFoundError.invalidInput(datasetName) + + } + + /// Get training mixup parameters based on Bit-Hyperrule specification for dataset sizes + /// + /// - Parameters: + /// - datasetSize: number of images in the current dataset + /// - Returns: mixup alpha based on number of images + func getMixUp(datasetSize: Int) -> Double { + return datasetSize < 20000 ? 0.0 : 0.1 + } + + /// Get the learning rate schedule based on the dataset size + /// + /// - Parameters: + /// - datasetSize: number of images in the current dataset + /// - Returns: learning rate schedule based on the current dataset + func getSchedule(datasetSize: Int) -> Array { + if datasetSize == 100 { + return [25, 50, 75, 100] + } + if datasetSize < 20000{ + return [100, 200, 300, 400, 500] + } + else if datasetSize < 500000 { + return [500, 3000, 6000, 9000, 10000] + } + else { + return [500, 6000, 12000, 18000, 20000] + } + } + + /// Get learning rate at the current step given the dataset size and base learning rate + /// + /// - Parameters: + /// - step: current training step + /// - datasetSize: number of images in the dataset + /// - baseLearningRate: starting learning rate to modify + /// - Returns: learning rate at the current step in training + func getLearningRate(step: Int, datasetSize: Int, baseLearningRate: Float = 0.003) -> Float? { + let supports = getSchedule(datasetSize: datasetSize) + // Linear warmup + print(step) + print(supports) + if step < supports[0] { + return baseLearningRate * Float(step) / Float(supports[0]) + } + // End of training + else if step >= supports.last! { + return nil + } + // Staircase decays by factor of 10 + else { + var baseLearningRate = baseLearningRate + for s in supports[1...] { + if s < step { + baseLearningRate = baseLearningRate / 10.0 + } + } + return baseLearningRate + } + } + public typealias Datum = (patch: Tensor, label: Tensor) + public typealias LabeledImage = LabeledData, Tensor> + public typealias Batches = Slices, label: Tensor)], ArraySlice>> + + func getTrainingDataBigTransfer( + from dataset: OISTBeeVideo, + numberForeground: Int = 10000, + numberBackground: Int = 10000 + ) -> [Datum] { + let bgBoxes = dataset.makeBackgroundBoundingBoxes(patchSize: (40, 70), batchSize: numberBackground).map { + (patch: Tensor($0.frame!.patch(at: $0.obb)).unstacked(alongAxis: 2)[0], label: Tensor(0)) + } + let fgBoxes = dataset.makeForegroundBoundingBoxes(patchSize: (40, 70), batchSize: numberForeground).map { + (patch: Tensor($0.frame!.patch(at: $0.obb)).unstacked(alongAxis: 2)[0], label: Tensor(1)) + } + + var boxes = fgBoxes + bgBoxes + return boxes.map{(patch: Tensor(stacking: [$0.patch, $0.patch, $0.patch], alongAxis: 2), label: $0.label)} + } + + /// Stores the training statistics for the BigTransfer training process which are different than usual + /// because the mixedup labels must be accounted for while running training statistics. + struct BigTransferTrainingStatistics { + var correctGuessCount = Tensor(0, on: Device.default) + var totalGuessCount = Tensor(0, on: Device.default) + var totalLoss = Tensor(0, on: Device.default) + var batches: Int = 0 + var accuracy: Float { + Float(correctGuessCount.scalarized()) / Float(totalGuessCount.scalarized()) * 100 + } + var averageLoss: Float { totalLoss.scalarized() / Float(batches) } + + init(on device: Device = Device.default) { + correctGuessCount = Tensor(0, on: device) + totalGuessCount = Tensor(0, on: device) + totalLoss = Tensor(0, on: device) + } + + mutating func update(logits: Tensor, labels: Tensor, loss: Tensor) { + let correct = logits.argmax(squeezingAxis: 1) .== labels.argmax(squeezingAxis: 1) + correctGuessCount += Tensor(correct).sum() + totalGuessCount += Int32(labels.shape[0]) + totalLoss += loss + batches += 1 + } + } + + fileprivate func makeBatch( + samples: BatchSamples, device: Device) -> LabeledImage where BatchSamples.Element == (patch: Tensor, label: Tensor) { + let labels = Tensor(samples.map(\.label)) + let imageTensor = Tensor(samples.map(\.patch)) + return LabeledImage(data: imageTensor, label: labels) +} + // Train Big Transfer + func run() { + let plt = Python.import("matplotlib.pyplot") + let dataDir = URL(fileURLWithPath: "./OIST_Data") + + let trainingDataset = OISTBeeVideo(directory: dataDir, length: 80)! + let validationDataset = OISTBeeVideo(directory: dataDir, afterIndex: 80, length: 20)! + + let training = getTrainingDataBigTransfer(from: trainingDataset, numberForeground: 256, numberBackground: 256) + let validation = getTrainingDataBigTransfer(from: validationDataset, numberForeground: 600, numberBackground: 600) + + + let classCount = 2 + var bitModel = BigTransfer(classCount: classCount, depth: getModelUnits(modelName: modelName), modelName: modelName) + let dataCount = 6000 + + var optimizer = SGD(for: bitModel, learningRate: 0.003, momentum: 0.9) + optimizer = SGD(copying: optimizer, to: device) + + print("Beginning training...") + var batchSize: Int = 16 + var currStep: Int = 1 + let lrSupports = getSchedule(datasetSize: dataCount) + let scheduleLength = lrSupports.last! + let stepsPerEpoch = dataCount / batchSize + let epochCount = scheduleLength / stepsPerEpoch + let resizeSize = getResolution(originalResolution: (40, 70)) + + let trainingData = TrainingEpochs(samples: training, batchSize: batchSize).lazy.map { + (batches: Batches) -> LazyMapSequence in + return batches.lazy.map{ makeBatch(samples: $0, device: device) } + } + + let validationData = validation.inBatches(of: batchSize).lazy.map { + makeBatch(samples: $0, device: device) + } + + for (epoch, batches) in trainingData.prefix(epochCount).enumerated() { + let start = Date() + var trainStats = BigTransferTrainingStatistics(on: device) + var testStats = BigTransferTrainingStatistics(on: device) + + Context.local.learningPhase = .training + for batch in batches { + if let newLearningRate = getLearningRate(step: currStep, datasetSize: dataCount, baseLearningRate: 0.003) { + optimizer.learningRate = newLearningRate + currStep = currStep + 1 + } + else { + continue + } + + var (eagerImages, eagerLabels) = (batch.data, batch.label) + let resized = resize(images: eagerImages, size: (resizeSize.0, resizeSize.1)) + let flipped = tf.image.random_flip_left_right(resized.makeNumpyArray()) + var newLabels = Tensor(Tensor(oneHotAtIndices: eagerLabels, depth: classCount)) + + let images = Tensor(copying: Tensor(numpy: flipped.numpy())!, to: device) + let labels = Tensor(copying: newLabels, to: device) + let 𝛁model = TensorFlow.gradient(at: bitModel) { bitModel -> Tensor in + let ŷ = bitModel(images) + let loss = softmaxCrossEntropy(logits: ŷ, probabilities: labels) + trainStats.update(logits: ŷ, labels: labels, loss: loss) + return loss + } + + optimizer.update(&bitModel, along: 𝛁model) + + LazyTensorBarrier() + } + + print("Checking validation statistics...") + Context.local.learningPhase = .inference + for batch in validationData { + var (eagerImages, eagerLabels) = (batch.data, batch.label) + let resized = resize(images: eagerImages, size: (resizeSize.0, resizeSize.1)) + let newLabels = Tensor(Tensor(oneHotAtIndices: eagerLabels, depth: classCount)) + let images = Tensor(copying: resized, to: device) + let labels = Tensor(copying: newLabels, to: device) + let ŷ = bitModel(images) + let loss = softmaxCrossEntropy(logits: ŷ, probabilities: labels) + LazyTensorBarrier() + testStats.update(logits: ŷ, labels: labels, loss: loss) + } + + print( + """ + [Epoch \(epoch)] \ + Training Loss: \(String(format: "%.3f", trainStats.averageLoss)), \ + Training Accuracy: \(trainStats.correctGuessCount)/\(trainStats.totalGuessCount) \ + (\(String(format: "%.1f", trainStats.accuracy))%), \ + Test Loss: \(String(format: "%.3f", testStats.averageLoss)), \ + Test Accuracy: \(testStats.correctGuessCount)/\(testStats.totalGuessCount) \ + (\(String(format: "%.1f", testStats.accuracy))%) \ + seconds per epoch: \(String(format: "%.1f", Date().timeIntervalSince(start))) + """) + } + + let testData = OISTBeeVideo(directory: dataDir, afterIndex: 100, length: 80)! + + let trackerEvaluation = TrackerEvaluationDataset(testData) + + let frames = testData.frames + let firstTrack = testData.tracks[0] + let firstFrame = frames[0] + let firstObb = firstTrack.boxes[0] + + let lr = 100.0 + var GDOptimizer = GradientDescent(learningRate: lr) + let it_limit = 80 + + + let folderName = "Results/GD_optimization_BiT_lr_\(lr)__10_22_2021_final_images_4subplots" + if !FileManager.default.fileExists(atPath: folderName) { + do { + try FileManager.default.createDirectory(atPath: folderName, withIntermediateDirectories: true, attributes: nil) + } catch { + print(error.localizedDescription) + } + } + + print("hello1") + let (fig, axs) = plt.subplots(2,2).tuple2 + let fr = np.squeeze(firstFrame.makeNumpyArray()) + for i in 0...1 { + for j in 0...1 { + axs[i,j].imshow(fr / 255.0, cmap: "gray") + let firstGroundTruth = firstObb.center + axs[i,j].set_xlim(firstGroundTruth.t.x - 50, firstGroundTruth.t.x + 50) + axs[i,j].set_ylim(firstGroundTruth.t.y - 50, firstGroundTruth.t.y + 50) + axs[i,j].get_xaxis().set_visible(false) + axs[i,j].get_yaxis().set_visible(false) + } + } + axs[0,0].set_title("fabs(theta) < 6 Degrees", fontsize:8) + axs[0,1].set_title("fabs(theta) < 12 Degrees", fontsize:8) + axs[1,0].set_title("fabs(theta) < 16 Degrees", fontsize:8) + axs[1,1].set_title("fabs(theta) >= 16 Degrees", fontsize:8) + + print("hello") + let xy_thresh = 20.0 //pixels + let theta_thresh = 0.5 //radians // consider doing overlap. + + + // NN Params + let (imageHeight, imageWidth, imageChannels) = (40, 70, 1) + let featureSize = 256 + let kHiddenDimension = 512 + + var useClassifier = true + if useClassifier { + var classifier = bitModel + for j in 0...200 { + var (dx, dy, dtheta, startpose, v, poseId, fg) = initialize_and_perturb(p: firstObb.center) + let factorNNC = ProbablisticTrackingFactor2(poseId, + measurement: firstFrame, + classifier: classifier, + patchSize: (40, 70), + appearanceModelSize: (40, 70) + ) + fg.store(factorNNC) + + + // PERFORM GRADIENT DESCENT + var (conv, errors, xs, ys, thetas) = initialize_empty_arrays() + print("starting optimization") + for i in 0.. firstObb.center.t.x + xy_thresh) || (v[poseId].t.x < firstObb.center.t.x - xy_thresh) + let y_out_of_bounds = (v[poseId].t.y > firstObb.center.t.y + xy_thresh) || (v[poseId].t.y < firstObb.center.t.y - xy_thresh) + let theta_out_of_bounds = (v[poseId].rot.theta > firstObb.center.rot.theta + theta_thresh) || (v[poseId].rot.theta < firstObb.center.rot.theta - theta_thresh) + if !x_out_of_bounds && !theta_out_of_bounds && !y_out_of_bounds { + if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.1 { + axs[0,0].plot(startpose.t.x,startpose.t.y,"g,", ms: 5) + } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.2 { + axs[0,1].plot(startpose.t.x,startpose.t.y,"g,", ms: 5) + } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.3 { + axs[1,0].plot(startpose.t.x,startpose.t.y,"g,", ms: 5) + } else { + axs[1,1].plot(startpose.t.x,startpose.t.y,"g,", ms: 5) + } + + } else { + if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.1 { + axs[0,0].plot(startpose.t.x,startpose.t.y,"r,", ms: 5) + } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.2 { + axs[0,1].plot(startpose.t.x,startpose.t.y,"r,", ms: 5) + } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.3 { + axs[1,0].plot(startpose.t.x,startpose.t.y,"r,", ms: 5) + } else { + axs[1,1].plot(startpose.t.x,startpose.t.y,"r,", ms: 5) + } + } + let (figs, axes) = plotFrameWithPatches3(frame: firstFrame, start: startpose, end: v[poseId], expected: firstObb.center, firstGroundTruth: firstObb.center, errors: errors, xs: xs, ys: ys, thetas: thetas) + var final_err: Double + var label_err: Double + var start_err: Double + + + final_err = factorNNC.errorVector(v[poseId]).x + label_err = factorNNC.errorVector(firstObb.center).x + start_err = factorNNC.errorVector(startpose).x + + axes.set_title(String(axes.get_title())! + "\n final err = \(final_err)" + + "\n label err = \(label_err).x)" + + "\n start err = \(start_err)" + + "\n learning rate = \(lr)" + + "\n converged = \(conv)") + figs.savefig(folderName + "/optimization_final_\(j).png", bbox_inches: "tight") + plt.close("all") + fig.savefig(folderName + "/optimization_covergence_red_n_green_dots.png", bbox_inches: "tight") + + } + } +} + + +} + +/// Returns `t` as a Swift tuple. +fileprivate func unpack(_ t: Tuple2) -> (A, B) { + return (t.head, t.tail.head) +} +/// Returns `t` as a Swift tuple. +fileprivate func unpack(_ t: Tuple1) -> (A) { + return (t.head) +} \ No newline at end of file diff --git a/Scripts/Brando01.swift b/Scripts/Brando01.swift new file mode 100644 index 00000000..0a4b9340 --- /dev/null +++ b/Scripts/Brando01.swift @@ -0,0 +1,102 @@ +import ArgumentParser + +import SwiftFusion +import BeeDataset +import BeeTracking +import TensorFlow +import PythonKit +import Foundation + +import PenguinStructures + +/// Brando01: Tracker OpenCV +struct Brando01: ParsableCommand { + func run() { + + let dataDir = URL(fileURLWithPath: "./OIST_Data") + let data = OISTBeeVideo(directory: dataDir, length: 100)! + let testData = OISTBeeVideo(directory: dataDir, afterIndex: 100, length: 80)! + print("number of frames in training data:", data.labels.count) + print("number of frames in testing data", testData.labels.count, "\n\n") + + + let trackerEvaluation = TrackerEvaluationDataset(testData) + + let np = Python.import("numpy") + let cv2 = Python.import("cv2") + + + let evalTracker: Tracker = {frames, start in + + let tracker = cv2.TrackerMIL_create() + + var BB = Python.tuple([Int(start.center.t.x)-20, Int(start.center.t.y)-35, 40, 70]) + var smallframe = np.array(frames.first!.makeNumpyArray()) + let leftpt = Python.tuple([Int(start.center.t.x)-35, Int(start.center.t.y)-35]) + let rgtpt = Python.tuple([Int(start.center.t.x)+35, Int(start.center.t.y)+35]) + cv2.rectangle(smallframe, leftpt, rgtpt, Python.tuple([0,150,0]), 5) + cv2.imwrite("./image_new.png", smallframe) + tracker[dynamicMember: "init"](frames.first!.makeNumpyArray(), BB) + var results = [PythonObject]() + for (index, frame) in frames.enumerated() { + var a = tracker[dynamicMember: "update"](frame.makeNumpyArray()).tuple2 + let track_success = a.0 + let newBB = a.1 + if Bool(track_success)! { + results.append(newBB) + } + + + + } + print("printing python BB") + var track = [OrientedBoundingBox]() + for result in results { + let pythonBB = result.tuple4 + let rows = Int(pythonBB.2)! + let cols = Int(pythonBB.3)! + let rot = Rot2(0) + let vect = Vector2(Double(pythonBB.0)!+20, Double(pythonBB.1)!+35) + let center = Pose2(rot, vect) + let swiftBB = OrientedBoundingBox(center: center, rows: rows, cols: cols) + track.append(swiftBB) + } + return track + } + + + + + + let plt = Python.import("matplotlib.pyplot") + let sequenceCount = 1 + var results = trackerEvaluation.evaluate(evalTracker, sequenceCount: 1, deltaAnchor: 175, outputFile: "brando01") + for (index, value) in results.sequences.prefix(1).enumerated() { + var i: Int = 0 + zip(value.subsequences.first!.frames, zip(value.subsequences.first!.prediction, value.subsequences.first!.groundTruth)).map { + let (fig, axes) = plotFrameWithPatches(frame: $0.0, actual: $0.1.0.center, expected: $0.1.1.center, firstGroundTruth: value.subsequences.first!.groundTruth.first!.center) + fig.savefig("Results/brando01/sequence\(index)/brando01\(i).png", bbox_inches: "tight") + plt.close("all") + i = i + 1 + } + + + let (fig, axes) = plt.subplots(1, 2, figsize: Python.tuple([20, 20])).tuple2 + fig.suptitle("Tracking positions and Subsequence Average Overlap with Accuracy \(String(format: "%.2f", value.subsequences.first!.metrics.accuracy)) and Robustness \(value.subsequences.first!.metrics.robustness).") + + value.subsequences.map { + plotPoseDifference( + track: $0.prediction.map{$0.center}, withGroundTruth: $0.groundTruth.map{$0.center}, on: axes[0] + ) + } + plotOverlap( + metrics: value.subsequences.first!.metrics, on: axes[1] + ) + fig.savefig("Results/brando01/brando01_subsequence\(index).png", bbox_inches: "tight") + print("Accuracy for sequence is \(value.sequenceMetrics.accuracy) with Robustness of \(value.sequenceMetrics.robustness)") + } + + print("Accuracy for all sequences is \(results.trackerMetrics.accuracy) with Robustness of \(results.trackerMetrics.robustness)") + + } +} \ No newline at end of file diff --git a/Scripts/Brando02.swift b/Scripts/Brando02.swift new file mode 100644 index 00000000..ad027cb2 --- /dev/null +++ b/Scripts/Brando02.swift @@ -0,0 +1,66 @@ +import ArgumentParser + +import SwiftFusion +import BeeDataset +import BeeTracking +import TensorFlow +import PythonKit +import Foundation + +import PenguinStructures + +/// Brando02 OpenCV tracker +struct Brando02: ParsableCommand { + func run() { + + let np = Python.import("numpy") + let cv2 = Python.import("cv2") + let os = Python.import("os") + let image_names = os.listdir("../OIST_Data/downsampled") + let track_names = os.listdir("../OIST_Data/tracks") + image_names.sort() + track_names.sort() + let track = track_names[10] + let frame = cv2.imread("../OIST_Data/downsampled/" + image_names[0]) + let centers = Python.list() + let fs = Python.open("../OIST_Data/tracks/" + track, "r") + let lines = fs.readlines() + print(type(of: lines)) + var i = 0 + for line in lines { + if i == 0 { + i += 1 + continue + } + i += 1 + let lineSwift = String(line) + let lineSwift2 = lineSwift ?? "" + let nums = lineSwift2.components(separatedBy: " ") + let height = Float(nums[1]) + let width = Float(nums[0]) + centers.append(Python.tuple([Python.float(width),Python.float(height)])) + } + + + let width1 = Float(centers[0][0]) + let height1 = Float(centers[0][1]) + let width = width1 ?? 0 + let height = height1 ?? 0 + let BB = Python.tuple([Int(width-35),Int(height-35),70,70]) + let tracker = cv2.TrackerMIL_create() + tracker[dynamicMember: "init"](frame, BB) + var results = [PythonObject]() + for image_name in image_names { + let framei = cv2.imread("../OIST_Data/downsampled/" + image_name) + var a = tracker[dynamicMember: "update"](framei).tuple2 + let track_success = a.0 + let newBB = a.1 + if Bool(track_success)! { + results.append(newBB) + } + } + + + } + +} \ No newline at end of file diff --git a/Scripts/Brando03.swift b/Scripts/Brando03.swift new file mode 100644 index 00000000..cc6b4f3b --- /dev/null +++ b/Scripts/Brando03.swift @@ -0,0 +1,206 @@ +import ArgumentParser + +import SwiftFusion +import BeeDataset +import BeeTracking +import TensorFlow +import PythonKit +import Foundation + +import PenguinStructures + +/// Brando01 SiamMask Tracker +struct Brando03: ParsableCommand { + + func run() { + + let dataDir = URL(fileURLWithPath: "./OIST_Data") + let testData = OISTBeeVideo(directory: dataDir, afterIndex: 100, length: 80)! + print("number of frames in testing data", testData.labels.count, "\n\n") + + + let trackerEvaluation = TrackerEvaluationDataset(testData) + let os = Python.import("os") + let torch = Python.import("torch") + + let np = Python.import("numpy") + let smtools = Python.import("SiamMask.tools") + let smutils = Python.import("SiamMask.utils") + let cfhelper = Python.import("SiamMask.utils.config_helper") + let ldhelper = Python.import("SiamMask.utils.load_helper") + let smtest = Python.import("SiamMask.tools.test") + + + let cv2 = Python.import("cv2") + + let argparse = Python.import("argparse") + let parser = argparse.ArgumentParser() + + parser.add_argument("--resume") + parser.add_argument("--config") + parser.add_argument("--base_path") + let args = parser.parse_args(["--resume", "../SiamMask/model_sharp/checkpoint_e20.pth", "--config", "../SiamMask/experiments/siammask_sharp/config_vot.json", "--base_path", "./OIST_Data/downsampled"]) + + print("ARGUMENTS", args) + + + print(Python.version) + print("hello") + let evalTracker: Tracker = { frames, start in + + //SIAM MASK TRACKER IS HERE + let device = torch.device("cpu") + torch.backends.cudnn.benchmark = true + + // # Setup Model + let cfg = cfhelper.load_config(args) + let custom = Python.import("SiamMask.experiments.siammask_sharp.custom") + var siammask = custom.Custom(anchors: cfg["anchors"]) + siammask = ldhelper.load_pretrain(siammask, args.resume) + + siammask.eval().to(device) + let init_rect = Python.tuple([Int(start.center.t.x)-20, Int(start.center.t.y)-20, 40, 70]) + let tup = init_rect.tuple4 + let x = tup.0 + let y = tup.1 + let w = tup.2 + let h = tup.3 + + var state: PythonObject = 0 + var results = [PythonObject]() + + for (f, im) in frames.enumerated() { + + let im_np = im.makeNumpyArray() + let im_3d = np.squeeze(np.stack(Python.tuple([im_np, im_np, im_np]), axis: 2)) + + if f == 0 { // init + let target_pos = np.array([x + w / 2, y + h / 2]) + let target_sz = np.array([w, h]) + state = smtest.siamese_init(im_3d, target_pos, target_sz, siammask, cfg["hp"], device: device) //# init tracker + results.append(Python.tuple([Int(x + w / 2)!, Int(y + h / 2)!])) + } else if f > 0 { //# tracking + state = smtest.siamese_track(state, im_3d, mask_enable: true, refine_enable: true, device: device) //# track + let location = state["ploygon"].flatten() + + + results.append(location) + + + } + + } + + var track = [OrientedBoundingBox]() + for (i, result) in results.enumerated() { + if i > 0 { + let location = result + let centx = Int((location[0]+location[2]+location[4]+location[6])/4)! + let centy = Int((location[1]+location[3]+location[5]+location[7])/4)! + let dx1 = location[0]-location[2] + let dy1 = location[1]-location[3] + let dx2 = location[0]-location[6] + let dy2 = location[1]-location[7] + let dist1 = sqrt(pow(Double(dx1)!, 2) + pow(Double(dy1)!, 2)) + let dist2 = (pow(Double(dx2)!, 2) + pow(Double(dy2)!, 2)).squareRoot() + let locx: Int + let locy: Int + let rows: Int + let cols: Int + if dist1 < dist2 { + locx = Int((location[0]+location[2])/2)! + locy = Int((location[1]+location[3])/2)! + rows = Int(dist1) + cols = Int(dist2) + } else { + locx = Int((location[0]+location[6])/2)! + locy = Int((location[1]+location[7])/2)! + rows = Int(dist2) + cols = Int(dist1) + } + let dx = Double(abs(locx - centx)) + let dy = Double(abs(locy - centy)) + var theta = Double.pi/2 + print("polygon", result) + print("center", centx, centy) + print("dx and dy", dx, dy) + print("theta initial", theta) + if dx != 0 { + theta = atan(dy/dx) + } + + if locx >= centx && locy < centy{ + theta = -theta + } else if locx < centx && locy >= centy{ + theta = .pi - theta + } else if locx < centx && locy < centy{ + theta = .pi + theta + } + print("theta final", theta) + + let rot = Rot2(theta) + let vect = Vector2(Double(centx), Double(centy)) + print("rotation", rot, "\n\n") + let center = Pose2(rot, vect) + let swiftBB = OrientedBoundingBox(center: center, rows: rows, cols: cols) + track.append(swiftBB) + } else { + let swiftBB = start + track.append(swiftBB) + } + } + return track + } + + let plt = Python.import("matplotlib.pyplot") + let sequenceCount = 20 + var eval_results = trackerEvaluation.evaluate(evalTracker, sequenceCount: sequenceCount, deltaAnchor: 175, outputFile: "brando03") + print("done evaluating") + var total_overlap = eval_results.sequences.prefix(sequenceCount)[0].subsequences.first!.metrics.overlap + + for (index, value) in eval_results.sequences.prefix(sequenceCount).enumerated() { + + print("done,", index) + let (fig, axes) = plt.subplots(1, 2, figsize: Python.tuple([20, 20])).tuple2 + fig.suptitle("Tracking positions and Subsequence Average Overlap with Accuracy \(String(format: "%.2f", value.subsequences.first!.metrics.accuracy)) and Robustness \(value.subsequences.first!.metrics.robustness).") + value.subsequences.map { + //zip($0.prediction, $0.groundTruth).enumerated().map{($0.0, $0.1.0.center, $0.1.1.center)}) + let encoder = JSONEncoder() + let data = try! encoder.encode($0.prediction) + FileManager.default.createFile(atPath: "Results/brando03/prediction_siammask_sequence_\(index).json", contents: data, attributes: nil) + } + value.subsequences.map { + plotPoseDifference( + track: $0.prediction.map{$0.center}, withGroundTruth: $0.groundTruth.map{$0.center}, on: axes[0] + ) + } + plotOverlap( + metrics: value.subsequences.first!.metrics, on: axes[1] + ) + + fig.savefig("Results/brando03/brando03_subsequence\(index).png", bbox_inches: "tight") + print("Accuracy for sequence is \(value.sequenceMetrics.accuracy) with Robustness of \(value.sequenceMetrics.robustness)") + } + print("Accuracy for all sequences is \(eval_results.trackerMetrics.accuracy) with Robustness of \(eval_results.trackerMetrics.robustness)") + + let pickle = Python.import("pickle"); + let f = Python.open("Results/EAO/siammask.data", "wb") + pickle.dump(eval_results.expectedAverageOverlap.curve, f) + + + // var average_overlap = [Double]() + // for (i, val) in total_overlap.enumerated() { + // average_overlap.append(val/Double(sequenceCount)) + // } + // let (fig, ax) = plt.subplots().tuple2 + // ax.plot(average_overlap) + // ax.set_title("Overlap") + // fig.savefig("average_overlap.png") + + + + + + } + +} \ No newline at end of file diff --git a/Scripts/Brando04.swift b/Scripts/Brando04.swift new file mode 100644 index 00000000..d1354f30 --- /dev/null +++ b/Scripts/Brando04.swift @@ -0,0 +1,108 @@ +import ArgumentParser + +import SwiftFusion +import BeeDataset +import BeeTracking +import TensorFlow +import PythonKit +import Foundation + + + + +/// Brando04: NNClassifier training +struct Brando04: ParsableCommand { + typealias LikelihoodModel = TrackingLikelihoodModel + + + @Flag(help: "Training mode") + var training: Bool = false + + let num_boxes: Int = 10000 + + func getTrainingDataBG( + from dataset: OISTBeeVideo + ) -> (Tensor, Tensor) { + print("bg") + + let bgBoxes = dataset.makeBackgroundBoundingBoxes(patchSize: (40, 70), batchSize: num_boxes).map { + $0.frame!.patch(at: $0.obb) + } + print("bg2") + let labels = Tensor(zeros: [num_boxes]) + print("labels done bg") + let patches = Tensor(stacking: bgBoxes.map {$0}) + print("patches done bg") + return (labels, patches) + } + + + + func getTrainingDataFG( + from dataset: OISTBeeVideo + ) -> (Tensor, Tensor) { + print("fg") + let bgBoxes = dataset.makeForegroundBoundingBoxes(patchSize: (40, 70), batchSize: num_boxes).map { + $0.frame!.patch(at: $0.obb) + } + print("bg2") + let labels = Tensor(zeros: [num_boxes]) + print("labels done bg") + let patches = Tensor(stacking: bgBoxes.map {$0}) + print("patches done bg") + return (labels, patches) + } + + + + // Just runs an RP tracker and saves image to file + // Make sure you have a folder `Results/fan12` before running + func run() { + let folderName = "classifiers/classifiers_today" + if !FileManager.default.fileExists(atPath: folderName) { + do { + try FileManager.default.createDirectory(atPath: folderName, withIntermediateDirectories: true, attributes: nil) + } catch { + print(error.localizedDescription) + } + } else { + print("folder exists") + } + + + let dataDir = URL(fileURLWithPath: "./OIST_Data") + let trainingDataset = OISTBeeVideo(directory: dataDir, length: 100)! + var (labels_fg, patches_fg) = getTrainingDataFG(from: trainingDataset) + var (labels_bg, patches_bg) = getTrainingDataBG(from: trainingDataset) + + + var patches = Tensor(stacking: patches_bg.unstacked() + patches_fg.unstacked()) + var labels = Tensor(concatenate(labels_bg, labels_fg)) + print("shape of patches", patches.shape) + print("shape of labels", labels.shape) + + let kHiddenDimension = 512 + let featSize = 512 + let iterations = [5,6,7] + + for i in iterations { + let path = "./classifiers/classifiers_today/classifier_weight_\(kHiddenDimension)_\(featSize)_\(i)_60000boxes_600epochs.npy" + if FileManager.default.fileExists(atPath: path) { + print("File Already Exists. Abort training") + continue + } + print("Training...") + let rae: PretrainedNNClassifier = PretrainedNNClassifier( + patches: patches, + labels: labels, + given: PretrainedNNClassifier.HyperParameters(hiddenDimension: kHiddenDimension, latentDimension: featSize, weightFile: "", learningRate: 1e-3), + train_mode: "from_scratch" + ) + rae.save(to: path) + + } + + + + } +} diff --git a/Scripts/Brando05.swift b/Scripts/Brando05.swift new file mode 100644 index 00000000..b8a30e9e --- /dev/null +++ b/Scripts/Brando05.swift @@ -0,0 +1,119 @@ +import ArgumentParser + +import SwiftFusion +import BeeDataset +import BeeTracking +import TensorFlow +import PythonKit +import Foundation + +import PenguinStructures + +/// Brando05: TRACKING with NN Classifier +struct Brando05: ParsableCommand { + @Option(help: "Run for number of frames") + var trackLength: Int = 80 + + // Runs NNClassifier tracker on n number of sequences and outputs relevant images and statistics + func run() { + let np = Python.import("numpy") + let featureSizes = [256] + let kHiddenDimensions = [512] + let iterations = [1] + let trainingDatasetSize = 100 + + let dataDir = URL(fileURLWithPath: "./OIST_Data") + let data = OISTBeeVideo(directory: dataDir, length: trainingDatasetSize)! + let testData = OISTBeeVideo(directory: dataDir, afterIndex: trainingDatasetSize, length: trackLength)! + + let trackerEvaluation = TrackerEvaluationDataset(testData) + + for featureSize in featureSizes { + for kHiddenDimension in kHiddenDimensions { + for j in iterations { + + + let (imageHeight, imageWidth, imageChannels) = + (40, 70, 1) + + var classifier = NNClassifier( + imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels, hiddenDimension: kHiddenDimension, latentDimension: featureSize + ) + // LOAD THE CLASSIFIER + classifier.load(weights: np.load("./classifiers/classifiers_today/large_classifier_weight_\(kHiddenDimension)_\(featureSize)_\(j).npy", allow_pickle: true)) + + let evalTracker: Tracker = {frames, start in + var tracker = makeProbabilisticTracker2( + model: classifier, + frames: frames, + targetSize: (40, 70) + ) + let prediction = tracker.infer(knownStart: Tuple1(start.center), withSampling: true) + let track = tracker.frameVariableIDs.map { OrientedBoundingBox(center: prediction[unpack($0)], rows: 40, cols:70) } + return track + + } + + let plt = Python.import("matplotlib.pyplot") + let sequenceCount = 1 + var results = trackerEvaluation.evaluate(evalTracker, sequenceCount: sequenceCount, deltaAnchor: 175, outputFile: "classifier") + + + for (index, value) in results.sequences.prefix(sequenceCount).enumerated() { + let folderName = "Results/classifier/classifier_\(kHiddenDimension)_\(featureSize)_\(j)_10000sampling" + print(folderName) + if !FileManager.default.fileExists(atPath: folderName) { + do { + try FileManager.default.createDirectory(atPath: folderName, withIntermediateDirectories: true, attributes: nil) + try FileManager.default.createDirectory(atPath: folderName + "/sequence0", withIntermediateDirectories: true, attributes: nil) + } catch { + print(error.localizedDescription) + } + } + + var i: Int = 0 + zip(value.subsequences.first!.frames, zip(value.subsequences.first!.prediction, value.subsequences.first!.groundTruth)).map { + let (fig, axes) = plotFrameWithPatches(frame: $0.0, actual: $0.1.0.center, expected: $0.1.1.center, firstGroundTruth: value.subsequences.first!.groundTruth.first!.center) + fig.savefig(folderName + "/sequence\(index)/classifier_\(i).png", bbox_inches: "tight") + plt.close("all") + i = i + 1 + } + + let (fig, axes) = plt.subplots(1, 2, figsize: Python.tuple([20, 20])).tuple2 + fig.suptitle("Tracking positions and Subsequence Average Overlap with Accuracy \(String(format: "%.2f", value.subsequences.first!.metrics.accuracy)) and Robustness \(value.subsequences.first!.metrics.robustness).") + + value.subsequences.map { + plotPoseDifference( + track: $0.prediction.map{$0.center}, withGroundTruth: $0.groundTruth.map{$0.center}, on: axes[0] + ) + } + plotOverlap( + metrics: value.subsequences.first!.metrics, on: axes[1] + ) + + fig.savefig(folderName + "/classifier_\(kHiddenDimension)_\(featureSize)_\(j)subsequence\(index).png", bbox_inches: "tight") + print("Accuracy for sequence is \(value.sequenceMetrics.accuracy) with Robustness of \(value.sequenceMetrics.robustness)") + } + + print("Accuracy for all sequences is \(results.trackerMetrics.accuracy) with Robustness of \(results.trackerMetrics.robustness)") + + + + + } + } + } + + + + } +} + +/// Returns `t` as a Swift tuple. +fileprivate func unpack(_ t: Tuple2) -> (A, B) { + return (t.head, t.tail.head) +} +/// Returns `t` as a Swift tuple. +fileprivate func unpack(_ t: Tuple1) -> (A) { + return (t.head) +} diff --git a/Scripts/Brando06.swift b/Scripts/Brando06.swift new file mode 100644 index 00000000..811469a4 --- /dev/null +++ b/Scripts/Brando06.swift @@ -0,0 +1,180 @@ +import ArgumentParser + +import SwiftFusion +import BeeDataset +import BeeTracking +import TensorFlow +import PythonKit +import Foundation + + +import PenguinStructures + +// This script produces HISTOGRAMS for the output of NN Classifiers +struct Brando06: ParsableCommand { + + func run() { + let dataDir = URL(fileURLWithPath: "./OIST_Data") + let testData = OISTBeeVideo(directory: dataDir, afterIndex: 100, length: 80)! + let batchSize = 3000 + let fgBoxes = testData.makeForegroundBoundingBoxes(patchSize: (40, 70), batchSize: batchSize) + let bgBoxes = testData.makeBackgroundBoundingBoxes(patchSize: (40, 70), batchSize: batchSize) + let fgpatches = Tensor(stacking: fgBoxes.map { $0.frame!.patch(at: $0.obb)}) + let bgpatches = Tensor(stacking: bgBoxes.map { $0.frame!.patch(at: $0.obb)}) + let np = Python.import("numpy") + let kHiddenDimensions = [512] + let featSizes = [512] + var plt = Python.import("matplotlib.pyplot") + + + for i in featSizes { + for j in kHiddenDimensions { + for num in 1...1 { + + let featureSize = i + let kHiddenDimension = j + + + let (imageHeight, imageWidth, imageChannels) = + (40, 70, 1) + + var classifier = NNClassifier( + imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels, + hiddenDimension: kHiddenDimension, latentDimension: featureSize + ) + var weightsFile: String? + if let weightsFile = weightsFile { + classifier.load(weights: np.load(weightsFile, allow_pickle: true)) + } else { + classifier.load(weights: np.load("./classifiers/classifiers_today/classifier_weight_512_512_1_doubletraining.npy", allow_pickle: true)) + } + + let outfg = classifier.classify(fgpatches) + let outbg = classifier.classify(bgpatches) + let softmaxfg = softmax(outfg) + let softmaxbg = softmax(outbg) + let folderName = "Results/brando06/classified_images" + if !FileManager.default.fileExists(atPath: folderName) { + do { + try FileManager.default.createDirectory(atPath: folderName, withIntermediateDirectories: true, attributes: nil) + } catch { + print(error.localizedDescription) + } + } + for i in 0...30 { + //Background + var (fig, ax) = plt.subplots(figsize: Python.tuple([8, 4])).tuple2 + var patch = bgpatches[i,0...,0...,0] + var fr = np.squeeze(patch.makeNumpyArray()) + ax.imshow(fr / 255.0, cmap: "gray") + ax.set_title("background image: \noutput index 0: \(softmaxbg[i][0])\noutput index 1: \(softmaxbg[i][1])") + fig.savefig(folderName + "/bgpatch\(i).png", bbox_inches: "tight") + plt.close("all") + //Foreground + (fig, ax) = plt.subplots(figsize: Python.tuple([8, 4])).tuple2 + patch = fgpatches[i,0...,0...,0] + fr = np.squeeze(patch.makeNumpyArray()) + ax.imshow(fr / 255.0, cmap: "gray") + ax.set_title("foreground image: \noutput index 0: \(softmaxfg[i][0])\noutput index 1: \(softmaxfg[i][1])") + fig.savefig(folderName + "/fgpatch\(i).png", bbox_inches: "tight") + plt.close("all") + } + + let shapefg = outfg.shape + let shapebg = outbg.shape + + var fgsum0 = 0.0 + var fgsum1 = 0.0 + var bgsum0 = 0.0 + var bgsum1 = 0.0 + var fg0_arr = [Double]() + var fg1_arr = [Double]() + var bg0_arr = [Double]() + var bg1_arr = [Double]() + for i in 0...batchSize-1 { + fgsum0 += Double(softmaxfg[i,0])! + fgsum1 += Double(softmaxfg[i,1])! + bgsum0 += Double(softmaxbg[i,0])! + bgsum1 += Double(softmaxbg[i,1])! + fg0_arr.append(Double(softmaxfg[i,0])!) + fg1_arr.append(Double(softmaxfg[i,1])!) + bg0_arr.append(Double(softmaxbg[i,0])!) + bg1_arr.append(Double(softmaxbg[i,1])!) + } + print("featSize", featureSize, "kHiddendimension", kHiddenDimension, "num", num, "val", fgsum1 + bgsum0 - fgsum0 - bgsum1) + + + + + print("feature size", featureSize) + print("fgsum1", fgsum1, "fgsum0", fgsum0) + print("bgsum1", bgsum1, "bgsum0", bgsum0) + + var (figs, axs) = plt.subplots(2,2).tuple2 + print("asda") + + plt.subplots_adjust(left:0.1, + bottom:0.1, + right:0.9, + top:0.9, + wspace:0.4, + hspace:0.4) + + + var ax1 = axs[1,0] + ax1.hist(fg0_arr, range: Python.tuple([-1,1]), bins: 50) + var mean = fgsum0/Double(batchSize) + var sd = 0.0 + for elem in fg0_arr { + sd += abs(elem - mean)/Double(batchSize) + } + ax1.set_title("Foreground. Output response for background. \n Mean = \(String(format: "%.2f", mean)) and SD = \(sd).", fontsize:8) + + ax1 = axs[0,0] + ax1.hist(fg1_arr, range: Python.tuple([-1,1]), bins: 50) + mean = fgsum1/Double(batchSize) + sd = 0.0 + for elem in fg1_arr { + sd += abs(elem - mean)/Double(batchSize) + } + ax1.set_title("Foreground. Output response for foreground. \n Mean = \(String(format: "%.2f", mean)) and SD = \(sd).", fontsize:8) + + ax1 = axs[1,1] + ax1.hist(bg0_arr, range: Python.tuple([-1,1]), bins: 50) + mean = bgsum0/Double(batchSize) + sd = 0.0 + for elem in bg0_arr { + sd += abs(elem - mean)/Double(batchSize) + } + ax1.set_title("Background. Output response for background. \n Mean = \(String(format: "%.2f", mean)) and SD = \(sd).", fontsize:8) + + ax1 = axs[0,1] + + ax1.hist(bg1_arr, range: Python.tuple([-1,1]), bins: 50) + mean = bgsum1/Double(batchSize) + sd = 0.0 + for elem in bg1_arr { + sd += abs(elem - mean)/Double(batchSize) + } + ax1.set_title("Background. Output response for foreground. \n Mean = \(String(format: "%.2f", mean)) and SD = \(sd).", fontsize:8) + + figs.savefig("hist_softmax_\(kHiddenDimension)_\(featureSize)_\(num).png") + plt.close(figs) + + + + } + } + } + + + + + + + + + + + } +} diff --git a/Scripts/Brando07.swift b/Scripts/Brando07.swift new file mode 100644 index 00000000..e2e55e31 --- /dev/null +++ b/Scripts/Brando07.swift @@ -0,0 +1,172 @@ +import ArgumentParser + +import SwiftFusion +import BeeDataset +import BeeTracking +import TensorFlow +import PythonKit +import Foundation + +import PenguinStructures + +/// Brando07: RAE + Prob density histograms +struct Brando07: ParsableCommand { + @Option(help: "Run for number of frames") + var trackLength: Int = 80 + + @Option(help: "Size of feature space") + var featureSize: Int = 256 + + @Option(help: "Pretrained weights") + var weightsFile: String? + + func run() { + let np = Python.import("numpy") + let kHiddenDimension = 512 + + let (imageHeight, imageWidth, imageChannels) = + (40, 70, 1) + + + var rae = DenseRAE( + imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels, + hiddenDimension: kHiddenDimension, latentDimension: featureSize + ) + + if let weightsFile = weightsFile { + rae.load(weights: np.load(weightsFile, allow_pickle: true)) + } else { + rae.load(weights: np.load("./oist_rae_weight_\(featureSize).npy", allow_pickle: true)) + } + print("s") + + let trainingDatasetSize = 100 + + let dataDir = URL(fileURLWithPath: "./OIST_Data") + let numberOfTrainingSamples = 3000 + + let dataset = OISTBeeVideo(directory: dataDir, length: 100)! // calling this twice caused the Killed to happen + let batchSize = 3000 + let fgBoxes = dataset.makeForegroundBoundingBoxes(patchSize: (40, 70), batchSize: batchSize) + print("here 1.5") + let bgBoxes = dataset.makeBackgroundBoundingBoxes(patchSize: (40, 70), batchSize: batchSize) + print("tests here2") + let fgpatches = Tensor(stacking: fgBoxes.map { $0.frame!.patch(at: $0.obb)}) + let bgpatches = Tensor(stacking: bgBoxes.map { $0.frame!.patch(at: $0.obb)}) + print("patches complete") + + + let batchPositive = rae.encode(fgpatches) + print("shape batch positive", batchPositive.shape) + let foregroundModel = MultivariateGaussian(from:batchPositive, regularizer: 1e-3) + let batchNegative = rae.encode(bgpatches) + let backgroundModel = MultivariateGaussian(from: batchNegative, regularizer: 1e-3) + + var outfg0 = [Double]() + var outfg1 = [Double]() + var outbg0 = [Double]() + var outbg1 = [Double]() + print(batchPositive[0,0...].shape) + print(backgroundModel.probability(batchPositive[0,0...])) + print(foregroundModel.probability(batchPositive[0,0...])) + + for i in 0...numberOfTrainingSamples-1 { + outfg0.append(backgroundModel.probability(batchPositive[i,0...])) + outfg1.append(foregroundModel.probability(batchPositive[i,0...])) + outbg0.append(backgroundModel.probability(batchNegative[i,0...])) + outbg1.append(foregroundModel.probability(batchNegative[i,0...])) + } + + + var plt = Python.import("matplotlib.pyplot") + + + var fgsum0 = 0.0 + var fgsum1 = 0.0 + var bgsum0 = 0.0 + var bgsum1 = 0.0 + var fg0_arr = [Double]() + var fg1_arr = [Double]() + var bg0_arr = [Double]() + var bg1_arr = [Double]() + for i in 0...batchSize-1 { + fgsum0 += (outfg0[i]) + fgsum1 += (outfg1[i]) + bgsum0 += (outbg0[i]) + bgsum1 += (outbg1[i]) + fg0_arr.append((outfg0[i])) + fg1_arr.append((outfg1[i])) + bg0_arr.append((outbg0[i])) + bg1_arr.append((outbg1[i])) + } + print("featSize", featureSize, "kHiddendimension", kHiddenDimension, "val", fgsum1 + bgsum0 - fgsum0 - bgsum1) + + + + + print("feature size", featureSize) + print("fgsum1", fgsum1, "fgsum0", fgsum0) + print("bgsum1", bgsum1, "bgsum0", bgsum0) + + var (figs, axs) = plt.subplots(2,2).tuple2 + print("asda") + + plt.subplots_adjust(left:0.1, + bottom:0.1, + right:0.9, + top:0.9, + wspace:0.4, + hspace:0.4) + + + var ax1 = axs[1,0] + ax1.hist(fg0_arr, range: Python.tuple([-1,1]), bins: 50) + var mean = fgsum0/Double(batchSize) + var sd = 0.0 + for elem in fg0_arr { + sd += abs(elem - mean)/Double(batchSize) + } + ax1.set_title("Foreground. Output response for background. \n Mean = \(String(format: "%.2f", mean)) and SD = \(sd).", fontsize:8) + + ax1 = axs[0,0] + ax1.hist(fg1_arr, range: Python.tuple([-1,1]), bins: 50) + mean = fgsum1/Double(batchSize) + sd = 0.0 + for elem in fg1_arr { + sd += abs(elem - mean)/Double(batchSize) + } + ax1.set_title("Foreground. Output response for foreground. \n Mean = \(String(format: "%.2f", mean)) and SD = \(sd).", fontsize:8) + + ax1 = axs[1,1] + ax1.hist(bg0_arr, range: Python.tuple([-1,1]), bins: 50) + mean = bgsum0/Double(batchSize) + sd = 0.0 + for elem in bg0_arr { + sd += abs(elem - mean)/Double(batchSize) + } + ax1.set_title("Background. Output response for background. \n Mean = \(String(format: "%.2f", mean)) and SD = \(sd).", fontsize:8) + + ax1 = axs[0,1] + + ax1.hist(bg1_arr, range: Python.tuple([-1,1]), bins: 50) + mean = bgsum1/Double(batchSize) + sd = 0.0 + for elem in bg1_arr { + sd += abs(elem - mean)/Double(batchSize) + } + ax1.set_title("Background. Output response for foreground. \n Mean = \(String(format: "%.2f", mean)) and SD = \(sd).", fontsize:8) + + figs.savefig("hist_rae_\(kHiddenDimension)_\(featureSize).png") + plt.close(figs) + + + + } + + + + + +} + + diff --git a/Scripts/Brando08.swift b/Scripts/Brando08.swift new file mode 100644 index 00000000..f4d414c1 --- /dev/null +++ b/Scripts/Brando08.swift @@ -0,0 +1,47 @@ +import ArgumentParser + +import SwiftFusion +import BeeDataset +import BeeTracking +import TensorFlow +import PythonKit +import Foundation + + +import PenguinStructures + +// PRINT IMAGE PATCHES TO VISUALIZE +struct Brando08: ParsableCommand { + + func run() { + let dataDir = URL(fileURLWithPath: "./OIST_Data") + let dataset = OISTBeeVideo(directory: dataDir, length: 100)! + let batchSize = 300 + let fgBoxes = dataset.makeForegroundBoundingBoxes(patchSize: (40, 70), batchSize: batchSize) + let bgBoxes = dataset.makeBackgroundBoundingBoxes(patchSize: (40, 70), batchSize: batchSize) + let fgpatches = Tensor(stacking: fgBoxes.map { $0.frame!.patch(at: $0.obb)}) + let bgpatches = Tensor(stacking: bgBoxes.map { $0.frame!.patch(at: $0.obb)}) + let np = Python.import("numpy") + var plt = Python.import("matplotlib.pyplot") + let mpl = Python.import("matplotlib") + + print(fgpatches.shape) + for i in batchSize-100...batchSize-1 { + let (fig, ax) = plt.subplots(figsize: Python.tuple([8, 4])).tuple2 + let patch = bgpatches[i,0...,0...,0] + let fr = np.squeeze(patch.makeNumpyArray()) + ax.imshow(fr / 255.0, cmap: "gray") + let folderName = "Results/brando08/bgpatches" + if !FileManager.default.fileExists(atPath: folderName) { + do { + try FileManager.default.createDirectory(atPath: folderName, withIntermediateDirectories: true, attributes: nil) + } catch { + print(error.localizedDescription) + } + } + fig.savefig("Results/brando08/bgpatches/patch\(i).png", bbox_inches: "tight") + plt.close("all") + + } + } +} diff --git a/Scripts/Brando10.swift b/Scripts/Brando10.swift new file mode 100644 index 00000000..827f8dcd --- /dev/null +++ b/Scripts/Brando10.swift @@ -0,0 +1,64 @@ + + + +import ArgumentParser + +import SwiftFusion +import BeeDataset +import BeeTracking +import TensorFlow +import PythonKit +import Foundation + +import PenguinStructures + +/// Brando10: Plot the samplings in progress. +struct Brando10: ParsableCommand { + @Option(help: "Run for number of frames") + var trackLength: Int = 80 + + // Runs NNClassifier tracker on n number of sequences and outputs relevant images and statistics + func run() { + let np = Python.import("numpy") + let featureSizes = [512] + let kHiddenDimensions = [512] + let iterations = [1] + let trainingDatasetSize = 100 + + let dataDir = URL(fileURLWithPath: "./OIST_Data") + let data = OISTBeeVideo(directory: dataDir, length: trainingDatasetSize)! + let testData = OISTBeeVideo(directory: dataDir, afterIndex: trainingDatasetSize, length: trackLength)! + + let trackerEvaluation = TrackerEvaluationDataset(testData) + + for i in 0...78 { + let folderName = "./sampling_512_512_2000samples" + let posex_np = np.load(folderName + "/sampling_frame_\(i)_posex.npy") + let posey_np = np.load(folderName + "/sampling_frame_\(i)_posey.npy") + let posetheta_np = np.load(folderName + "/sampling_frame_\(i)_posetheta.npy") + let error_np = np.load(folderName + "/sampling_frame_\(i)_error.npy") + let t = np.arange(0, 2000, 1) + + + let plt = Python.import("matplotlib.pyplot") + var (figs, axs) = plt.subplots(1,1, figsize: Python.tuple([10, 4])).tuple2 + + + axs[0].plot(t,error_np, linewidth: 1) + axs[0].set_title("error") + plt.subplots_adjust(left:0.1, + bottom:0.1, + right:0.9, + top:0.9, + wspace:0.4, + hspace:0.4) + + figs.savefig(folderName + "/sampling_figure_\(i).png") + plt.close("all") + + } + + + + } +} \ No newline at end of file diff --git a/Scripts/Brando11.swift b/Scripts/Brando11.swift new file mode 100644 index 00000000..671be2b5 --- /dev/null +++ b/Scripts/Brando11.swift @@ -0,0 +1,42 @@ +import ArgumentParser +import SwiftFusion +import BeeDataset +import BeeTracking +import TensorFlow +import PythonKit +import Foundation +import PenguinStructures + +/// Brando11: compute the mean displacement +struct Brando11: ParsableCommand { + @Option(help: "Run for number of frames") + var trackLength: Int = 80 + + func run() { + let np = Python.import("numpy") + let plt = Python.import("matplotlib.pyplot") + let trainingDatasetSize = 100 + + // LOAD THE IMAGE AND THE GROUND TRUTH ORIENTED BOUNDING BOX + let dataDir = URL(fileURLWithPath: "./OIST_Data") + let data = OISTBeeVideo(directory: dataDir, afterIndex: trainingDatasetSize, length: trackLength)! + var dX = [Double]() + var dY = [Double]() + var dTheta = [Double]() + for track in data.tracks { + var prevObb: OrientedBoundingBox? + prevObb = nil + for obb in track.boxes { + if prevObb == nil { + prevObb = obb + } else { + dX.append(obb.center.t.x - (prevObb)!.center.t.x) + dY.append(obb.center.t.y - (prevObb)!.center.t.y) + dTheta.append(obb.center.rot.theta - (prevObb)!.center.rot.theta) + } + } + } + // Plot histogram. + + } +} diff --git a/Scripts/Brando12.swift b/Scripts/Brando12.swift new file mode 100644 index 00000000..fad930f2 --- /dev/null +++ b/Scripts/Brando12.swift @@ -0,0 +1,311 @@ +import ArgumentParser +import SwiftFusion +import BeeDataset +import BeeTracking +import TensorFlow +import PythonKit +import Foundation +import PenguinStructures + +/// Brando12: OPTIMIZATION CONVERGENCE VISUALIZATION [red & green dots] +struct Brando12: ParsableCommand { + @Option(help: "Run for number of frames") + var trackLength: Int = 80 + + @Option(help: "Classifier or rae") + var useClassifier: Bool = true + + func initialize_and_perturb(p: Pose2) -> (Double, Double, Double, Pose2, VariableAssignments, TypedID, FactorGraph) { + // CREATE A PLACEHOLDER FOR POSE + var v = VariableAssignments() + let poseId = v.store(p) + v[poseId].perturbWith(stddev: Vector3(0.3, 8, 4.6)) + let dx = v[poseId].t.x - p.t.x + let dy = v[poseId].t.y - p.t.y + let dtheta = v[poseId].rot.theta - p.rot.theta + let startpose = v[poseId] + let fg = FactorGraph() + + return (dx, dy, dtheta, startpose, v, poseId, fg) + } + + func initialize_empty_arrays() -> (Bool, [Double], [Double], [Double], [Double]) { + var conv = true + var errors = [Double]() + var xs = [Double]() + var ys = [Double]() + var thetas = [Double]() + return (conv, errors, xs, ys, thetas) + } + + + + func run() { + let np = Python.import("numpy") + let plt = Python.import("matplotlib.pyplot") + let trainingDatasetSize = 100 + + // LOAD THE IMAGE AND THE GROUND TRUTH ORIENTED BOUNDING BOX + let dataDir = URL(fileURLWithPath: "./OIST_Data") + let testData = OISTBeeVideo(directory: dataDir, afterIndex: trainingDatasetSize, length: trackLength)! + let data = OISTBeeVideo(directory: dataDir, length: trainingDatasetSize)! + let frames = testData.frames + let firstTrack = testData.tracks[0] + let firstFrame = frames[0] + let firstObb = firstTrack.boxes[0] + + + //OPTIMIZER GRADIENT DESCENT + let lr = 1e-7 + var optimizer = GradientDescent(learningRate: lr) + let it_limit = 200 + + + //CREATE A FOLDER TO CONTAIN THE END-RESULT IMAGES OF THE OPTIMIZATION + let str: String + if useClassifier{ + str = "NNC" + } else { + str = "RAE" + } + let folderName = "Results/GD_optimization_\(str)_lr_\(lr)__3_09_2021_final_images_4subplots" + if !FileManager.default.fileExists(atPath: folderName) { + do { + try FileManager.default.createDirectory(atPath: folderName, withIntermediateDirectories: true, attributes: nil) + } catch { + print(error.localizedDescription) + } + } + + + + + + //CREATE A FIG + print("hello1") + let (fig, axs) = plt.subplots(2,2).tuple2 + let fr = np.squeeze(firstFrame.makeNumpyArray()) + for i in 0...1 { + for j in 0...1 { + axs[i,j].imshow(fr / 255.0, cmap: "gray") + let firstGroundTruth = firstObb.center + axs[i,j].set_xlim(firstGroundTruth.t.x - 50, firstGroundTruth.t.x + 50) + axs[i,j].set_ylim(firstGroundTruth.t.y - 50, firstGroundTruth.t.y + 50) + axs[i,j].get_xaxis().set_visible(false) + axs[i,j].get_yaxis().set_visible(false) + } + } + axs[0,0].set_title("fabs(theta) < 0.1", fontsize:8) + axs[0,1].set_title("fabs(theta) < 0.2", fontsize:8) + axs[1,0].set_title("fabs(theta) < 0.3", fontsize:8) + axs[1,1].set_title("fabs(theta) >= 0.3", fontsize:8) + + print("hello") + let xy_thresh = 20.0 //pixels + let theta_thresh = 0.5 //radians // consider doing overlap. + + + // NN Params + let (imageHeight, imageWidth, imageChannels) = (40, 70, 1) + let featureSize = 256 + let kHiddenDimension = 512 + + + if useClassifier { + var classifier = NNClassifier( + imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels, hiddenDimension: kHiddenDimension, latentDimension: featureSize + ) + classifier.load(weights: np.load("./classifiers/classifiers_today/classifier_weight_\(kHiddenDimension)_\(featureSize)_1_doubletraining.npy", allow_pickle: true)) + + for j in 0...200 { + // RANDOMLY PERTURB THE GROUND TRUTH POSE AND CALCULATE THE PERTURBATION + var (dx, dy, dtheta, startpose, v, poseId, fg) = initialize_and_perturb(p: firstObb.center) + // CREATE THE FACTOR AND FACTOR GRAPH + let factorNNC = ProbablisticTrackingFactor2(poseId, + measurement: firstFrame, + classifier: classifier, + patchSize: (40, 70), + appearanceModelSize: (40, 70) + ) + fg.store(factorNNC) + + + // PERFORM GRADIENT DESCENT + var (conv, errors, xs, ys, thetas) = initialize_empty_arrays() + print("starting optimization") + for i in 0.. firstObb.center.t.x + xy_thresh) || (v[poseId].t.x < firstObb.center.t.x - xy_thresh) + let y_out_of_bounds = (v[poseId].t.y > firstObb.center.t.y + xy_thresh) || (v[poseId].t.y < firstObb.center.t.y - xy_thresh) + let theta_out_of_bounds = (v[poseId].rot.theta > firstObb.center.rot.theta + theta_thresh) || (v[poseId].rot.theta < firstObb.center.rot.theta - theta_thresh) + if !x_out_of_bounds && !theta_out_of_bounds && !y_out_of_bounds { + + if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.1 { + axs[0,0].plot(startpose.t.x,startpose.t.y,"g,", ms: 1) + } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.2 { + axs[0,1].plot(startpose.t.x,startpose.t.y,"g,", ms: 1) + } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.3 { + axs[1,0].plot(startpose.t.x,startpose.t.y,"g,", ms: 1) + } else { + axs[1,1].plot(startpose.t.x,startpose.t.y,"g,", ms: 1) + } + + } else { + + if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.1 { + axs[0,0].plot(startpose.t.x,startpose.t.y,"r,", ms: 1) + } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.2 { + axs[0,1].plot(startpose.t.x,startpose.t.y,"r,", ms: 1) + } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.3 { + axs[1,0].plot(startpose.t.x,startpose.t.y,"r,", ms: 1) + } else { + axs[1,1].plot(startpose.t.x,startpose.t.y,"r,", ms: 1) + } + } + let (figs, axes) = plotFrameWithPatches3(frame: firstFrame, start: startpose, end: v[poseId], expected: firstObb.center, firstGroundTruth: firstObb.center, errors: errors, xs: xs, ys: ys, thetas: thetas) + var final_err: Double + var label_err: Double + var start_err: Double + + + final_err = factorNNC.errorVector(v[poseId]).x + label_err = factorNNC.errorVector(firstObb.center).x + start_err = factorNNC.errorVector(startpose).x + + axes.set_title(String(axes.get_title())! + "\n final err = \(final_err)" + + "\n label err = \(label_err).x)" + + "\n start err = \(start_err)" + + "\n learning rate = \(lr)" + + "\n converged = \(conv)") + figs.savefig(folderName + "/optimization_final_\(j).png", bbox_inches: "tight") + plt.close("all") + fig.savefig(folderName + "/optimization_covergence_red_n_green_dots.png", bbox_inches: "tight") + + + + + } + + + + + + + } else { + // LOAD RAE AND TRAIN BG AND FG MODELS + var rae = DenseRAE( + imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels, + hiddenDimension: kHiddenDimension, latentDimension: featureSize + ) + rae.load(weights: np.load("./oist_rae_weight_\(featureSize).npy", allow_pickle: true)) + let (fg, bg, _) = getTrainingBatches( + dataset: data, boundingBoxSize: (40, 70), fgBatchSize: 3000, bgBatchSize: 3000, + fgRandomFrameCount: 10, bgRandomFrameCount: 10, useCache: true + ) + let batchPositive = rae.encode(fg) + let foregroundModel = MultivariateGaussian(from:batchPositive, regularizer: 1e-3) + let batchNegative = rae.encode(bg) + let backgroundModel = MultivariateGaussian(from: batchNegative, regularizer: 1e-3) + + for j in 0...200 { + + // RANDOMLY PERTURB THE GROUND TRUTH POSE AND CALCULATE THE PERTURBATION + var (dx, dy, dtheta, startpose, v, poseId, fg) = initialize_and_perturb(p: firstObb.center) + // CREATE THE FACTOR AND FACTOR GRAPH + let factorRAE = ProbablisticTrackingFactor(poseId, + measurement: firstFrame, + encoder: rae, + patchSize: (40, 70), + appearanceModelSize: (40, 70), + foregroundModel: foregroundModel, + backgroundModel: backgroundModel, + maxPossibleNegativity: 1e7 + ) + fg.store(factorRAE) + // PERFORM GRADIENT DESCENT + var (conv, errors, xs, ys, thetas) = initialize_empty_arrays() + print("starting optimization") + for i in 0.. firstObb.center.t.x + xy_thresh) || (v[poseId].t.x < firstObb.center.t.x - xy_thresh) + let y_out_of_bounds = (v[poseId].t.y > firstObb.center.t.y + xy_thresh) || (v[poseId].t.y < firstObb.center.t.y - xy_thresh) + let theta_out_of_bounds = (v[poseId].rot.theta > firstObb.center.rot.theta + theta_thresh) || (v[poseId].rot.theta < firstObb.center.rot.theta - theta_thresh) + if !x_out_of_bounds && !theta_out_of_bounds && !y_out_of_bounds { + + if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.1 { + axs[0,0].plot(startpose.t.x,startpose.t.y,"g,", ms: 1) + } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.2 { + axs[0,1].plot(startpose.t.x,startpose.t.y,"g,", ms: 1) + } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.3 { + axs[1,0].plot(startpose.t.x,startpose.t.y,"g,", ms: 1) + } else { + axs[1,1].plot(startpose.t.x,startpose.t.y,"g,", ms: 1) + } + + } else { + if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.1 { + axs[0,0].plot(startpose.t.x,startpose.t.y,"r,", ms: 1) + } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.2 { + axs[0,1].plot(startpose.t.x,startpose.t.y,"r,", ms: 1) + } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.3 { + axs[1,0].plot(startpose.t.x,startpose.t.y,"r,", ms: 1) + } else { + axs[1,1].plot(startpose.t.x,startpose.t.y,"r,", ms: 1) + } + } + let (figs, axes) = plotFrameWithPatches3(frame: firstFrame, start: startpose, end: v[poseId], expected: firstObb.center, firstGroundTruth: firstObb.center, errors: errors, xs: xs, ys: ys, thetas: thetas) + var final_err: Double + var label_err: Double + var start_err: Double + + final_err = factorRAE.errorVector(v[poseId]).x + label_err = factorRAE.errorVector(firstObb.center).x + start_err = factorRAE.errorVector(startpose).x + + axes.set_title(String(axes.get_title())! + "\n final err = \(final_err)" + + "\n label err = \(label_err).x)" + + "\n start err = \(start_err)" + + "\n learning rate = \(lr)" + + "\n converged = \(conv)") + figs.savefig(folderName + "/optimization_final_\(j).png", bbox_inches: "tight") + plt.close("all") + fig.savefig(folderName + "/optimization_covergence_red_n_green_dots.png", bbox_inches: "tight") + + } + } + } +} diff --git a/Scripts/Brando13.swift b/Scripts/Brando13.swift new file mode 100644 index 00000000..8678c7cb --- /dev/null +++ b/Scripts/Brando13.swift @@ -0,0 +1,421 @@ +import ArgumentParser +import SwiftFusion +import BeeDataset +import BeeTracking +import TensorFlow +import PythonKit +import Foundation +import PenguinStructures + +/// Brando13: OPTIMIZATION CONVERGENCE VISUALIZATION with LM +struct Brando13: ParsableCommand { + @Option(help: "Run for number of frames") + var trackLength: Int = 80 + + @Option(help: "Classifier or rae") + var useClassifier: Bool = false + + func initialize_and_perturb(p: Pose2) -> (Double, Double, Double, Pose2, VariableAssignments, TypedID, FactorGraph) { + // CREATE A PLACEHOLDER FOR POSE + var v = VariableAssignments() + let poseId = v.store(p) + v[poseId].perturbWith(stddev: Vector3(0.3, 8, 4.6)) + let dx = v[poseId].t.x - p.t.x + let dy = v[poseId].t.y - p.t.y + let dtheta = v[poseId].rot.theta - p.rot.theta + let startpose = v[poseId] + let fg = FactorGraph() + + return (dx, dy, dtheta, startpose, v, poseId, fg) + } + + func initialize_empty_arrays() -> (Bool, [Double], [Double], [Double], [Double]) { + var conv = true + var errors = [Double]() + var xs = [Double]() + var ys = [Double]() + var thetas = [Double]() + return (conv, errors, xs, ys, thetas) + } + + + + func run() { + let np = Python.import("numpy") + let plt = Python.import("matplotlib.pyplot") + let trainingDatasetSize = 100 + + // LOAD THE IMAGE AND THE GROUND TRUTH ORIENTED BOUNDING BOX + let dataDir = URL(fileURLWithPath: "./OIST_Data") + let testData = OISTBeeVideo(directory: dataDir, afterIndex: trainingDatasetSize, length: trackLength)! + let data = OISTBeeVideo(directory: dataDir, length: trainingDatasetSize)! + let frames = testData.frames + let firstTrack = testData.tracks[0] + // let firstTrack = testData.tracks[5] + let firstFrame = frames[0] + let firstObb = firstTrack.boxes[0] + // let firstObb = firstTrack.boxes[5] + + + //OPTIMIZER GRADIENT DESCENT + // let lr = 1e-7 + // var optimizer = GradientDescent(learningRate: lr) + let it_limit = 200 + /// The set of steps taken. + var step: Int = 0 + + /// Desired precision, TODO(fan): make this actually work + var precision: Double = 1e-10 + + /// The precision of the CGLS solver. + var cgls_precision: Double = 1e-10 + + /// Maximum number of L-M iterations + var max_iteration: Int = 50 + + /// Maximum number of G-N iterations + var max_inner_iteration: Int = 400 + + /// Maximam Lambda + var max_lambda: Double = 1e32 + + /// Minimum Lambda + var min_lambda: Double = 1e-16 + + /// Initial Lambda + // var initial_lambda: Double = 1e-4 + var initial_lambda: Double = 1e7 + + /// Lambda Factor + var lambda_factor: Double = 2 + + + //CREATE A FOLDER TO CONTAIN THE END-RESULT IMAGES OF THE OPTIMIZATION + let str: String + if useClassifier{ + str = "NNC" + } else { + str = "RAE" + } + let folderName = "Results/LM_optimization_\(str)__17_09_2021_final_images_4subplots" + if !FileManager.default.fileExists(atPath: folderName) { + do { + try FileManager.default.createDirectory(atPath: folderName, withIntermediateDirectories: true, attributes: nil) + } catch { + print(error.localizedDescription) + } + } + + + + + + //CREATE A FIG + print("hello1") + let (fig, axs) = plt.subplots(2,2).tuple2 + let fr = np.squeeze(firstFrame.makeNumpyArray()) + for i in 0...1 { + for j in 0...1 { + axs[i,j].imshow(fr / 255.0, cmap: "gray") + let firstGroundTruth = firstObb.center + // axs[i,j].plot(firstObb.corners.map{$0.x} + [firstObb.corners.first!.x], firstObb.corners.map{$0.y} + [firstObb.corners.first!.y], "b-") + axs[i,j].set_xlim(firstGroundTruth.t.x - 50, firstGroundTruth.t.x + 50) + axs[i,j].set_ylim(firstGroundTruth.t.y - 50, firstGroundTruth.t.y + 50) + axs[i,j].get_xaxis().set_visible(false) + axs[i,j].get_yaxis().set_visible(false) + } + } + axs[0,0].set_title("fabs(theta) < 5deg", fontsize:8) + axs[0,1].set_title("fabs(theta) < 10deg", fontsize:8) + axs[1,0].set_title("fabs(theta) < 25deg", fontsize:8) + axs[1,1].set_title("fabs(theta) >= 25deg", fontsize:8) + + print("hello") + let xy_thresh = 20.0 //pixels + let theta_thresh = 0.5 //radians // consider doing overlap. + + + // NN Params + let (imageHeight, imageWidth, imageChannels) = (40, 70, 1) + let featureSize = 256 + let kHiddenDimension = 512 + + + if useClassifier { + print("using classifier") + // var classifier = NNClassifier( + // imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels, hiddenDimension: kHiddenDimension, latentDimension: featureSize + // ) + // classifier.load(weights: np.load("./classifiers/classifiers_today/classifier_weight_\(kHiddenDimension)_\(featureSize)_1_doubletraining.npy", allow_pickle: true)) + + // for j in 0...200 { + // // RANDOMLY PERTURB THE GROUND TRUTH POSE AND CALCULATE THE PERTURBATION + // var (dx, dy, dtheta, startpose, v, poseId, fg) = initialize_and_perturb(p: firstObb.center) + // // CREATE THE FACTOR AND FACTOR GRAPH + // let factorNNC = ProbablisticTrackingFactor2(poseId, + // measurement: firstFrame, + // classifier: classifier, + // patchSize: (40, 70), + // appearanceModelSize: (40, 70) + // ) + // fg.store(factorNNC) + // print(firstObb.center) + + + // // PERFORM GRADIENT DESCENT + // var (conv, errors, xs, ys, thetas) = initialize_empty_arrays() + // print("starting optimization") + // for i in 0.. firstObb.center.t.x + xy_thresh) || (v[poseId].t.x < firstObb.center.t.x - xy_thresh) + // let y_out_of_bounds = (v[poseId].t.y > firstObb.center.t.y + xy_thresh) || (v[poseId].t.y < firstObb.center.t.y - xy_thresh) + // let theta_out_of_bounds = (v[poseId].rot.theta > firstObb.center.rot.theta + theta_thresh) || (v[poseId].rot.theta < firstObb.center.rot.theta - theta_thresh) + // if !x_out_of_bounds && !theta_out_of_bounds && !y_out_of_bounds { + // // plot a green dot + // // ax.scatter(startpose.t.x-Double(xbegin),startpose.t.y-Double(ybegin),c:"r", marker: ",") + // // ax.scatter(startpose.t.x,startpose.t.y,c:"r", marker: ",") + // if fabs(startpose.rot.theta*180/Double.pi - firstObb.center.rot.theta*180/Double.pi) < 5 { + // axs[0,0].plot(startpose.t.x,startpose.t.y,"g,", ms: 2) + // } else if fabs(startpose.rot.theta*180/Double.pi - firstObb.center.rot.theta*180/Double.pi) < 10 { + // axs[0,1].plot(startpose.t.x,startpose.t.y,"g,", ms: 2) + // } else if fabs(startpose.rot.theta*180/Double.pi - firstObb.center.rot.theta*180/Double.pi) < 25 { + // axs[1,0].plot(startpose.t.x,startpose.t.y,"g,", ms: 2) + // } else { + // axs[1,1].plot(startpose.t.x,startpose.t.y,"g,", ms: 2) + // } + + // } else { + // // ax.scatter(startpose.t.x-Double(xbegin),startpose.t.y-Double(ybegin),c:"g", marker: ",") + // // ax.scatter(startpose.t.x,startpose.t.y,c:"g", marker: ",") + // if fabs(startpose.rot.theta*180/Double.pi - firstObb.center.rot.theta*180/Double.pi) < 5 { + // axs[0,0].plot(startpose.t.x,startpose.t.y,"r,", ms: 2) + // } else if fabs(startpose.rot.theta*180/Double.pi - firstObb.center.rot.theta*180/Double.pi) < 10 { + // axs[0,1].plot(startpose.t.x,startpose.t.y,"r,", ms: 2) + // } else if fabs(startpose.rot.theta*180/Double.pi - firstObb.center.rot.theta*180/Double.pi) < 25 { + // axs[1,0].plot(startpose.t.x,startpose.t.y,"r,", ms: 2) + // } else { + // axs[1,1].plot(startpose.t.x,startpose.t.y,"r,", ms: 2) + // } + // } + // let (figs, axes) = plotFrameWithPatches3(frame: firstFrame, start: startpose, end: v[poseId], expected: firstObb.center, firstGroundTruth: firstObb.center, errors: errors, xs: xs, ys: ys, thetas: thetas) + // var final_err: Double + // var label_err: Double + // var start_err: Double + + + // final_err = factorNNC.errorVector(v[poseId]).x + // label_err = factorNNC.errorVector(firstObb.center).x + // start_err = factorNNC.errorVector(startpose).x + + // axes.set_title(String(axes.get_title())! + "\n final err = \(final_err)" + // + "\n label err = \(label_err).x)" + // + "\n start err = \(start_err)" + // + "\n learning rate = \(lr)" + // + "\n converged = \(conv)") + // figs.savefig(folderName + "/optimization_final_\(j).png", bbox_inches: "tight") + // // let (figs2, axes2) = plotXYandTheta(xs: xs, ys: ys, thetas: thetas) + // // figs2.savefig(folderName + "/optimization_final_\(j)_XYtheta.png", bbox_inches: "tight") + // plt.close("all") + // fig.savefig(folderName + "/optimization_covergence_red_n_green_dots.png", bbox_inches: "tight") + + + + + // } + + + + + + + } else { + // LOAD RAE AND TRAIN BG AND FG MODELS + var rae = DenseRAE( + imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels, + hiddenDimension: kHiddenDimension, latentDimension: featureSize + ) + rae.load(weights: np.load("./oist_rae_weight_\(featureSize).npy", allow_pickle: true)) + let (fg, bg, _) = getTrainingBatches( + dataset: data, boundingBoxSize: (40, 70), fgBatchSize: 3000, bgBatchSize: 3000, + fgRandomFrameCount: 10, bgRandomFrameCount: 10, useCache: true + ) + let batchPositive = rae.encode(fg) + let foregroundModel = MultivariateGaussian(from:batchPositive, regularizer: 1e-3) + let batchNegative = rae.encode(bg) + let backgroundModel = MultivariateGaussian(from: batchNegative, regularizer: 1e-3) + + for j in 0...200 { + + // RANDOMLY PERTURB THE GROUND TRUTH POSE AND CALCULATE THE PERTURBATION + var (dx, dy, dtheta, startpose, v, poseId, fg) = initialize_and_perturb(p: firstObb.center) + // CREATE THE FACTOR AND FACTOR GRAPH + let factorRAE = ProbablisticTrackingFactor(poseId, + measurement: firstFrame, + encoder: rae, + patchSize: (40, 70), + appearanceModelSize: (40, 70), + foregroundModel: foregroundModel, + backgroundModel: backgroundModel, + maxPossibleNegativity: 1e7 + ) + fg.store(factorRAE) + print(firstObb.center) + // PERFORM GRADIENT DESCENT + var (conv, errors, xs, ys, thetas) = initialize_empty_arrays() + print("starting optimization LM") + var old_error = fg.linearizableError(at: v) + + var lambda: Double = initial_lambda + var inner_iter_step = 0 + var inner_success = false + var all_done = false + var i = 0 + precision = 1e-10 + max_iteration = 50 + step = 0 + for i in 0.. .ulpOfOne && model_fidelity > 0.01 { + old_error = this_error + + // Success, decrease lambda + if lambda > min_lambda { + lambda = lambda / lambda_factor + } + + inner_success = true + } else { + + // increase lambda and retry + v = oldval + if lambda > max_lambda { + print("OOOOOOOHHHHHH SHIT!") + break + } + lambda = lambda * lambda_factor + } + + if model_fidelity > 0.5 && delta_error < precision || this_error < precision { + inner_success = true + all_done = true + break + } + + inner_iter_step += 1 + if inner_success { + break + } + } + + step += 1 + + if all_done { + break + } + if i == max_iteration-1 { + conv = false + } + } + + + // PLOT THE FINAL OPTIMIZATION RESULT + let x_out_of_bounds = (v[poseId].t.x > firstObb.center.t.x + xy_thresh) || (v[poseId].t.x < firstObb.center.t.x - xy_thresh) + let y_out_of_bounds = (v[poseId].t.y > firstObb.center.t.y + xy_thresh) || (v[poseId].t.y < firstObb.center.t.y - xy_thresh) + let theta_out_of_bounds = (v[poseId].rot.theta > firstObb.center.rot.theta + theta_thresh) || (v[poseId].rot.theta < firstObb.center.rot.theta - theta_thresh) + if !x_out_of_bounds && !theta_out_of_bounds && !y_out_of_bounds { + // plot a green dot + // ax.scatter(startpose.t.x-Double(xbegin),startpose.t.y-Double(ybegin),c:"r", marker: ",") + // ax.scatter(startpose.t.x,startpose.t.y,c:"r", marker: ",") + if fabs(startpose.rot.theta*180/Double.pi - firstObb.center.rot.theta*180/Double.pi) < 5 { + axs[0,0].plot(startpose.t.x,startpose.t.y,"g,", ms: 2) + } else if fabs(startpose.rot.theta*180/Double.pi - firstObb.center.rot.theta*180/Double.pi) < 10 { + axs[0,1].plot(startpose.t.x,startpose.t.y,"g,", ms: 2) + } else if fabs(startpose.rot.theta*180/Double.pi - firstObb.center.rot.theta*180/Double.pi) < 25 { + axs[1,0].plot(startpose.t.x,startpose.t.y,"g,", ms: 2) + } else { + axs[1,1].plot(startpose.t.x,startpose.t.y,"g,", ms: 2) + } + + } else { + // ax.scatter(startpose.t.x-Double(xbegin),startpose.t.y-Double(ybegin),c:"g", marker: ",") + // ax.scatter(startpose.t.x,startpose.t.y,c:"g", marker: ",") + if fabs(startpose.rot.theta*180/Double.pi - firstObb.center.rot.theta*180/Double.pi) < 5 { + axs[0,0].plot(startpose.t.x,startpose.t.y,"r,", ms: 2) + } else if fabs(startpose.rot.theta*180/Double.pi - firstObb.center.rot.theta*180/Double.pi) < 10 { + axs[0,1].plot(startpose.t.x,startpose.t.y,"r,", ms: 2) + } else if fabs(startpose.rot.theta*180/Double.pi - firstObb.center.rot.theta*180/Double.pi) < 25 { + axs[1,0].plot(startpose.t.x,startpose.t.y,"r,", ms: 2) + } else { + axs[1,1].plot(startpose.t.x,startpose.t.y,"r,", ms: 2) + } + } + let (figs, axes) = plotFrameWithPatches3(frame: firstFrame, start: startpose, end: v[poseId], expected: firstObb.center, firstGroundTruth: firstObb.center, errors: errors, xs: xs, ys: ys, thetas: thetas) + var final_err: Double + var label_err: Double + var start_err: Double + + final_err = factorRAE.errorVector(v[poseId]).x + label_err = factorRAE.errorVector(firstObb.center).x + start_err = factorRAE.errorVector(startpose).x + + axes.set_title(String(axes.get_title())! + "\n final err = \(final_err)" + + "\n label err = \(label_err).x)" + + "\n start err = \(start_err)" + + "\n converged = \(conv)") + figs.savefig(folderName + "/optimization_final_\(j).png", bbox_inches: "tight") + // let (figs2, axes2) = plotXYandTheta(xs: xs, ys: ys, thetas: thetas) + // figs2.savefig(folderName + "/optimization_final_\(j)_XYtheta.png", bbox_inches: "tight") + plt.close("all") + fig.savefig(folderName + "/optimization_covergence_red_n_green_dots.png", bbox_inches: "tight") + + } + } + } +} \ No newline at end of file diff --git a/Scripts/Brando14.swift b/Scripts/Brando14.swift new file mode 100644 index 00000000..70d64f2b --- /dev/null +++ b/Scripts/Brando14.swift @@ -0,0 +1,199 @@ +import ArgumentParser +import SwiftFusion +import BeeDataset +import BeeTracking +import TensorFlow +import PythonKit +import Foundation +import PenguinStructures + +/// Brando14: ERRORVALUE over entire image +struct Brando14: ParsableCommand { + @Option(help: "Run for number of frames") + var trackLength: Int = 80 + + @Option(help: "Classifier or rae") + var useClassifier: Bool = true + + + func run() { + let np = Python.import("numpy") + let plt = Python.import("matplotlib.pyplot") + let trainingDatasetSize = 100 + + // LOAD THE IMAGE AND THE GROUND TRUTH ORIENTED BOUNDING BOX + let dataDir = URL(fileURLWithPath: "./OIST_Data") + let testData = OISTBeeVideo(directory: dataDir, afterIndex: trainingDatasetSize, length: trackLength)! + let data = OISTBeeVideo(directory: dataDir, length: trainingDatasetSize)! + let frames = testData.frames + let firstTrack = testData.tracks[0] + let firstFrame = frames[0] + let firstObb = firstTrack.boxes[0] + + let range = 100.0 + + // NN Params + let (imageHeight, imageWidth, imageChannels) = (40, 70, 1) + let featureSize = 512 + let kHiddenDimension = 512 + + + //CREATE A FOLDER TO CONTAIN THE END-RESULT IMAGES OF THE OPTIMIZATION + let str: String + if useClassifier{ + str = "NNC" + } else { + str = "RAE" + } + let lr = 1e-6 + let folderName = "Results/ErrorValueVizualized_\(str)_20000boxes_300epochs_retrained(0.0, 30, 0)_lr=\(lr)_2nd_iter.npy" + if !FileManager.default.fileExists(atPath: folderName) { + do { + try FileManager.default.createDirectory(atPath: folderName, withIntermediateDirectories: true, attributes: nil) + } catch { + print(error.localizedDescription) + } + } + + + + let firstGroundTruth = firstObb.center + print("oBB coordinates", firstGroundTruth.t.x, firstGroundTruth.t.y) + + //CREATE A FIG + print("hello1") + let (fig, axs) = plt.subplots(1,2).tuple2 + let fr = np.squeeze(firstFrame.makeNumpyArray()) + axs[0].imshow(fr / 255.0, cmap: "gray") + + + axs[0].set_xlim(firstGroundTruth.t.x - range/2, firstGroundTruth.t.x + range/2) + axs[0].set_ylim(firstGroundTruth.t.y - range/2, firstGroundTruth.t.y + range/2) + axs[1].set_xlim(0, range) + axs[1].set_ylim(0, range) + + let x = firstGroundTruth.t.x + let y = firstGroundTruth.t.y + + + + + + var values = Tensor(zeros: [Int(range), Int(range)]) + print("printing tensor",values) + + if useClassifier { + var classifier = NNClassifier( + imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels, hiddenDimension: kHiddenDimension, latentDimension: featureSize + ) + classifier.load(weights: np.load("./classifiers/classifiers_today/classifier_weight_512_512_1_20000boxes_300epochs_retrained(0.0, 30, 0)_lr=\(lr)_2nd_iter.npy", allow_pickle: true)) + + print("done loading") + for i in 0...Int(range)-1 { + for j in 0...Int(range)-1 { + let t = Vector2(x-range/2+Double(i), y-range/2+Double(j)) + // print("here3") + let p = Pose2(firstGroundTruth.rot, t) + var v = VariableAssignments() + let poseId = v.store(p) + let startpose = v[poseId] + var fg = FactorGraph() + // CREATE THE FACTOR AND FACTOR GRAPH + let factorNNC = ProbablisticTrackingFactor2(poseId, + measurement: firstFrame, + classifier: classifier, + patchSize: (40, 70), + appearanceModelSize: (40, 70) + ) + fg.store(factorNNC) + values[i,j] = Tensor(factorNNC.errorVector(v[poseId]).x) + // print(Tensor(factorNNC.errorVector(v[poseId]).x)) + + + + + + } + print("row", i) + } + let min_val = values.min() + if Double(min_val)! < 0 { + values = values-min_val + } + values = values/values.max()*255 + print(values[0...,0]) + print(values.shape) + axs[1].imshow(values.makeNumpyArray()) + fig.savefig(folderName + "/vizual_NNC.png", bbox_inches: "tight") + + + + + + + + + + + } else { + print("RAE") + // LOAD RAE AND TRAIN BG AND FG MODELS + var rae = DenseRAE( + imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels, + hiddenDimension: kHiddenDimension, latentDimension: featureSize + ) + rae.load(weights: np.load("./oist_rae_weight_\(featureSize).npy", allow_pickle: true)) + let (fg, bg, _) = getTrainingBatches( + dataset: data, boundingBoxSize: (40, 70), fgBatchSize: 3000, bgBatchSize: 3000, + fgRandomFrameCount: 10, bgRandomFrameCount: 10, useCache: true + ) + let batchPositive = rae.encode(fg) + let foregroundModel = MultivariateGaussian(from:batchPositive, regularizer: 1e-3) + let batchNegative = rae.encode(bg) + let backgroundModel = MultivariateGaussian(from: batchNegative, regularizer: 1e-3) + for i in 0...Int(range)-1 { + for j in 0...Int(range)-1 { + let t = Vector2(x-50.0+Double(i), y-50.0+Double(j)) + let p = Pose2(firstGroundTruth.rot, t) + var v = VariableAssignments() + let poseId = v.store(p) + let startpose = v[poseId] + var fg = FactorGraph() + // CREATE THE FACTOR AND FACTOR GRAPH + let factorRAE = ProbablisticTrackingFactor(poseId, + measurement: firstFrame, + encoder: rae, + patchSize: (40, 70), + appearanceModelSize: (40, 70), + foregroundModel: foregroundModel, + backgroundModel: backgroundModel, + maxPossibleNegativity: 1e7 + ) + fg.store(factorRAE) + values[i,j] = Tensor(factorRAE.errorVector(v[poseId]).x) + + + + + } + print("row", i) + } + print(values[0...,0]) + let min_val = values.min() + if Double(min_val)! < 0 { + values = values-min_val + } + values = values/values.max()*255 + print(values[0...,0]) + print(values.shape) + axs[1].imshow(values.makeNumpyArray()) + + fig.savefig(folderName + "/vizual_RAE.png", bbox_inches: "tight") + + + + + + } + } +} \ No newline at end of file diff --git a/Scripts/Brando15.swift b/Scripts/Brando15.swift new file mode 100644 index 00000000..59b9ccf9 --- /dev/null +++ b/Scripts/Brando15.swift @@ -0,0 +1,91 @@ +import ArgumentParser + +import SwiftFusion +import BeeDataset +import BeeTracking +import TensorFlow +import PythonKit +import Foundation + + + + +/// Brando15 SAVE PATCHES FOR LATER USE +struct Brando15: ParsableCommand { + typealias LikelihoodModel = TrackingLikelihoodModel + + + @Flag(help: "Training mode") + var training: Bool = false + + let num_boxes: Int = 10000 + + func getTrainingDataBG( + from dataset: OISTBeeVideo + ) -> (Tensor, Tensor) { + print("bg") + + // var allBoxes = [LikelihoodModel.Datum]() + let bgBoxes = dataset.makeBackgroundBoundingBoxes(patchSize: (40, 70), batchSize: num_boxes).map { + // (frame: $0.frame, type: LikelihoodModel.PatchType.bg, obb: $0.obb) + $0.frame!.patch(at: $0.obb) + } + print("bg2") + let labels = Tensor(zeros: [num_boxes]) + print("labels done bg") + let patches = Tensor(stacking: bgBoxes.map {$0}) + print("patches done bg") + return (labels, patches) + } + + + func getTrainingDataFG( + from dataset: OISTBeeVideo + ) -> (Tensor, Tensor) { + print("fg") + // var allBoxes = [LikelihoodModel.Datum]() + let fgBoxes = dataset.makeForegroundBoundingBoxes(patchSize: (40, 70), batchSize: num_boxes).map { + // (frame: $0.frame, type: LikelihoodModel.PatchType.bg, obb: $0.obb) + $0.frame!.patch(at: $0.obb) + } + print("bg2") + let labels = Tensor(ones: [num_boxes]) + print("labels done bg") + let patches = Tensor(stacking: fgBoxes.map {$0}) + print("patches done bg") + return (labels, patches) + } + + + + + // Just runs an RP tracker and saves image to file + // Make sure you have a folder `Results/fan12` before running + func run() { + let folderName = "classifiers/classifiers_today" + if !FileManager.default.fileExists(atPath: folderName) { + do { + try FileManager.default.createDirectory(atPath: folderName, withIntermediateDirectories: true, attributes: nil) + } catch { + print(error.localizedDescription) + } + } else { + print("folder exists") + } + + + let dataDir = URL(fileURLWithPath: "./OIST_Data") + let trainingDataset = OISTBeeVideo(directory: dataDir, length: 100)! + var (labels_bg, patches_bg) = getTrainingDataBG(from: trainingDataset) + let np = Python.import("numpy") + np.save("Patches_bg_\(num_boxes).npy", patches_bg.makeNumpyArray()) + var (labels_fg, patches_fg) = getTrainingDataFG(from: trainingDataset) + + // var patches = concatenate(patches_bg, patches_fg) + var patches = Tensor(stacking: patches_bg.unstacked() + patches_fg.unstacked()) + var labels = Tensor(concatenate(labels_bg, labels_fg)) + print("shape of patches", patches.shape) + print("shape of labels", labels.shape) + np.save("Patches_bg_fg_\(num_boxes).npy", patches.makeNumpyArray()) + } +} diff --git a/Scripts/Brando16.swift b/Scripts/Brando16.swift new file mode 100644 index 00000000..43343228 --- /dev/null +++ b/Scripts/Brando16.swift @@ -0,0 +1,126 @@ +import ArgumentParser + +import SwiftFusion +import BeeDataset +import BeeTracking +import TensorFlow +import PythonKit +import Foundation + +import PenguinStructures + +/// PCA tests +struct Brando16: ParsableCommand { + typealias LikelihoodModel = TrackingLikelihoodModel + + @Option(help: "Run for number of frames") + var trackLength: Int = 80 + + + func getTrainingData( + from dataset: OISTBeeVideo, + numberForeground: Int = 4500 + ) -> [LikelihoodModel.Datum] { + let fgBoxes = dataset.makeForegroundBoundingBoxes(patchSize: (40, 70), batchSize: numberForeground).map { + (frame: $0.frame, type: LikelihoodModel.PatchType.fg, obb: $0.obb) + } + + return fgBoxes + } + + // Runs RAE tracker on n number of sequences and outputs relevant images and statistics + // Make sure you have a folder `Results/andrew01` before running + func run() { + let np = Python.import("numpy") + let pickle = Python.import("pickle") + // used to be 512 + + let (imageHeight, imageWidth, imageChannels) = + (40, 70, 1) + + + var kHiddenDimension = [16, 64, 256] + for dim in kHiddenDimension { + let dataDir = URL(fileURLWithPath: "./OIST_Data") + + let trainingDataset = OISTBeeVideo(directory: dataDir, length: 100)! + + let trainingData = Tensor(stacking: getTrainingData(from: trainingDataset).map { $0.frame!.patch(at: $0.obb) }) + let testData = OISTBeeVideo(directory: dataDir, afterIndex: 100, length: trackLength)! + + + var statistics = FrameStatistics(Tensor(0.0)) + statistics.mean = Tensor(62.26806976644069) + statistics.standardDeviation = Tensor(37.44683834503672) + let trainingBatch = trainingDataset.makeBatch(statistics: statistics, appearanceModelSize: (imageHeight, imageWidth), batchSize: 4500) + let rae = PCAEncoder(from: trainingBatch, given: dim) + + + + let trackerEvaluation = TrackerEvaluationDataset(testData) + print("s1") + let evalTracker: Tracker = {frames, start in + var tracker = trainProbabilisticTracker( + trainingData: trainingDataset, + encoder: rae, + frames: frames, + boundingBoxSize: (40, 70), + withFeatureSize: dim, + fgRandomFrameCount: 100, + bgRandomFrameCount: 100 + ) + let prediction = tracker.infer(knownStart: Tuple1(start.center), withSampling: true) + let track = tracker.frameVariableIDs.map { OrientedBoundingBox(center: prediction[unpack($0)], rows: 40, cols:70) } + + return track + } + let plt = Python.import("matplotlib.pyplot") + let sequenceCount = 19 + var results = trackerEvaluation.evaluate(evalTracker, sequenceCount: sequenceCount, deltaAnchor: 175, outputFile: "andrew01") + + for (index, value) in results.sequences.prefix(sequenceCount).enumerated() { + var i: Int = 0 + zip(value.subsequences.first!.frames, zip(value.subsequences.first!.prediction, value.subsequences.first!.groundTruth)).map { + let (fig, axes) = plotFrameWithPatches(frame: $0.0, actual: $0.1.0.center, expected: $0.1.1.center, firstGroundTruth: value.subsequences.first!.groundTruth.first!.center) + fig.savefig("Results/ppca_\(dim)/sequence\(index)/andrew01_\(i).png", bbox_inches: "tight") + plt.close("all") + i = i + 1 + } + + + let (fig, axes) = plt.subplots(1, 2, figsize: Python.tuple([20, 20])).tuple2 + fig.suptitle("Tracking positions and Subsequence Average Overlap with Accuracy \(String(format: "%.2f", value.subsequences.first!.metrics.accuracy)) and Robustness \(value.subsequences.first!.metrics.robustness).") + + value.subsequences.map { + let encoder = JSONEncoder() + let data = try! encoder.encode($0.prediction) + FileManager.default.createFile(atPath: "Results/ppca_\(dim)/prediction_ppca_\(dim)_sequence_\(index).json", contents: data, attributes: nil) + plotPoseDifference( + track: $0.prediction.map{$0.center}, withGroundTruth: $0.groundTruth.map{$0.center}, on: axes[0] + ) + } + plotOverlap( + metrics: value.subsequences.first!.metrics, on: axes[1] + ) + fig.savefig("Results/ppca_\(dim)/andrew01_subsequence\(index).png", bbox_inches: "tight") + print("Accuracy for sequence is \(value.sequenceMetrics.accuracy) with Robustness of \(value.sequenceMetrics.robustness)") + } + + print("Accuracy for all sequences is \(results.trackerMetrics.accuracy) with Robustness of \(results.trackerMetrics.robustness)") + let f = Python.open("Results/ppca_\(dim)/EAO/rp_\(dim).data", "wb") + pickle.dump(results.expectedAverageOverlap.curve, f) + + + } + + } +} + +/// Returns `t` as a Swift tuple. +fileprivate func unpack(_ t: Tuple2) -> (A, B) { + return (t.head, t.tail.head) +} +/// Returns `t` as a Swift tuple. +fileprivate func unpack(_ t: Tuple1) -> (A) { + return (t.head) +} \ No newline at end of file diff --git a/Scripts/Fan03.swift b/Scripts/Fan03.swift index d153b95b..ce2c1aa8 100644 --- a/Scripts/Fan03.swift +++ b/Scripts/Fan03.swift @@ -27,7 +27,7 @@ struct Fan03: ParsableCommand { let rp = RandomProjection(fromShape: TensorShape([imageHeight, imageWidth, imageChannels]), toFeatureSize: featureSize) - let (fig, _, _) = runProbabilisticTracker( + let (fig, track, gt) = runProbabilisticTracker( directory: dataDir, encoder: rp, onTrack: trackId, forFrames: trackLength, withSampling: true, @@ -37,5 +37,17 @@ struct Fan03: ParsableCommand { /// Actual track v.s. ground truth track fig.savefig("Results/fan03/fan03_track\(trackId)_\(featureSize).pdf", bbox_inches: "tight") + + + let json = JSONEncoder() + json.outputFormatting = .prettyPrinted + let track_data = try! json.encode(track) + try! track_data.write(to: URL(fileURLWithPath: "Results/fan04/fan04_track_\(trackId)_\(featureSize).json")) + + let gt_data = try! json.encode(gt) + try! gt_data.write(to: URL(fileURLWithPath: "Results/fan04/fan04_gt_\(trackId)_\(featureSize).json")) + + + } } diff --git a/Scripts/Fan05.swift b/Scripts/Fan05.swift index ff43e4c2..1082d207 100644 --- a/Scripts/Fan05.swift +++ b/Scripts/Fan05.swift @@ -31,7 +31,6 @@ struct Fan05: ParsableCommand { let fgBoxes = dataset.makeForegroundBoundingBoxes(patchSize: (40, 70), batchSize: numberForeground).map { (frame: $0.frame, type: LikelihoodModel.PatchType.fg, obb: $0.obb) } - return fgBoxes + bgBoxes } diff --git a/Scripts/Fan12.swift b/Scripts/Fan12.swift index cf4649b7..ecaaca94 100644 --- a/Scripts/Fan12.swift +++ b/Scripts/Fan12.swift @@ -31,7 +31,7 @@ struct Fan12: ParsableCommand { // Just runs an RP tracker and saves image to file // Make sure you have a folder `Results/fan12` before running func run() { - let kHiddenDimension = 100 + let kHiddenDimension = 512 let dataDir = URL(fileURLWithPath: "./OIST_Data") let trainingDataset = OISTBeeVideo(directory: dataDir, length: 100)! diff --git a/Scripts/main.swift b/Scripts/main.swift index 9b96f9c1..c8337fea 100644 --- a/Scripts/main.swift +++ b/Scripts/main.swift @@ -17,7 +17,11 @@ import PenguinParallelWithFoundation struct Scripts: ParsableCommand { static var configuration = CommandConfiguration( - subcommands: [Andrew01.self, Fan01.self, Fan02.self, Fan03.self, Fan04.self, Fan05.self, Fan10.self, Fan12.self, Fan13.self, Fan14.self, + subcommands: [Brando01.self, Brando02.self, Brando03.self, Brando04.self, + Brando05.self, Brando06.self, Brando07.self, Brando08.self, + Brando10.self, Brando11.self, Brando12.self, Brando13.self, Brando14.self, Brando15.self, Brando16.self, Andrew01.self, + Andrew05.self, Andrew06.self, Andrew07.self, Andrew08.self, Fan01.self, Fan02.self, Fan03.self, Fan04.self, Fan05.self, + Fan10.self, Fan12.self, Fan13.self, Fan14.self, Frank01.self, Frank02.self, Frank03.self, Frank04.self]) } diff --git a/Sources/BeeDataset/Visualization.swift b/Sources/BeeDataset/Visualization.swift index 6558c08b..78975fb9 100644 --- a/Sources/BeeDataset/Visualization.swift +++ b/Sources/BeeDataset/Visualization.swift @@ -1,76 +1,76 @@ -// Copyright 2020 The SwiftFusion Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. +// // Copyright 2020 The SwiftFusion Authors. All Rights Reserved. +// // +// // Licensed under the Apache License, Version 2.0 (the "License"); +// // you may not use this file except in compliance with the License. +// // You may obtain a copy of the License at +// // +// // http://www.apache.org/licenses/LICENSE-2.0 +// // +// // Unless required by applicable law or agreed to in writing, software +// // distributed under the License is distributed on an "AS IS" BASIS, +// // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// // See the License for the specific language governing permissions and +// // limitations under the License. -import SwiftFusion -import TensorFlow -import Plotly -import ModelSupport -import Foundation +// import SwiftFusion +// import TensorFlow +// // import Plotly +// import ModelSupport +// import Foundation -/// Creates a Plotly figure that displays `frame`, with optional `boxes` overlaid on -/// them. -public func plot( - _ frame: Tensor, boxes: [(name: String, OrientedBoundingBox)] = [], - margin: Double = 30, scale: Double = 1 -) -> Plotly.Figure { - let rows = Double(frame.shape[0]) - let cols = Double(frame.shape[1]) +// /// Creates a Plotly figure that displays `frame`, with optional `boxes` overlaid on +// /// them. +// public func plot( +// _ frame: Tensor, boxes: [(name: String, OrientedBoundingBox)] = [], +// margin: Double = 30, scale: Double = 1 +// ) -> Plotly.Figure { +// let rows = Double(frame.shape[0]) +// let cols = Double(frame.shape[1]) - // Axis settings: - // - no grid - // - range is the image size - // - scale is anchored, to preserve image aspect ratio - // - y axis reversed so that everything is in "(u, v)" coordinates - let xAx = Layout.XAxis(range: [0, InfoArray(cols)], showGrid: false) - let yAx = Layout.YAxis( - autoRange: .reversed, range: [0, InfoArray(rows)], scaleAnchor: .xAxis(xAx), showGrid: false) +// // Axis settings: +// // - no grid +// // - range is the image size +// // - scale is anchored, to preserve image aspect ratio +// // - y axis reversed so that everything is in "(u, v)" coordinates +// let xAx = Layout.XAxis(range: [0, InfoArray(cols)], showGrid: false) +// let yAx = Layout.YAxis( +// autoRange: .reversed, range: [0, InfoArray(rows)], scaleAnchor: .xAxis(xAx), showGrid: false) - let tmpPath = URL(fileURLWithPath: "tmpForPlotlyDisplay.png") - ModelSupport.Image(Tensor(frame)).save(to: tmpPath) - let imageData = try! "data:image/png;base64," + Data(contentsOf: tmpPath).base64EncodedString() +// let tmpPath = URL(fileURLWithPath: "tmpForPlotlyDisplay.png") +// ModelSupport.Image(Tensor(frame)).save(to: tmpPath) +// let imageData = try! "data:image/png;base64," + Data(contentsOf: tmpPath).base64EncodedString() - return Figure( - data: [ - // Dummy data because Plotly is confused when there is no data. - Scatter( - x: [0, cols], y: [0, rows], - mode: .markers, marker: Shared.GradientMarker(opacity: 0), - xAxis: xAx, yAxis: yAx - ) - ] + boxes.map { box in - Scatter( - name: box.name, - x: box.1.corners.map { $0.x }, - y: box.1.corners.map { $0.y }, - xAxis: xAx, - yAxis: yAx - ) - }, - layout: Layout( - width: cols * scale + 2 * margin, - height: rows * scale + 2 * margin, - margin: Layout.Margin(l: margin, r: margin, t: margin, b: margin), - images: [ - Layout.Image( - visible: true, - source: imageData, - layer: .below, - xSize: cols, ySize: rows, - sizing: .stretch, - x: 0, y: 0, xReference: .xAxis(xAx), yReference: .yAxis(yAx) - ) - ] - ) - ) -} \ No newline at end of file +// return Figure( +// data: [ +// // Dummy data because Plotly is confused when there is no data. +// Scatter( +// x: [0, cols], y: [0, rows], +// mode: .markers, marker: Shared.GradientMarker(opacity: 0), +// xAxis: xAx, yAxis: yAx +// ) +// ] + boxes.map { box in +// Scatter( +// name: box.name, +// x: box.1.corners.map { $0.x }, +// y: box.1.corners.map { $0.y }, +// xAxis: xAx, +// yAxis: yAx +// ) +// }, +// layout: Layout( +// width: cols * scale + 2 * margin, +// height: rows * scale + 2 * margin, +// margin: Layout.Margin(l: margin, r: margin, t: margin, b: margin), +// images: [ +// Layout.Image( +// visible: true, +// source: imageData, +// layer: .below, +// xSize: cols, ySize: rows, +// sizing: .stretch, +// x: 0, y: 0, xReference: .xAxis(xAx), yReference: .yAxis(yAx) +// ) +// ] +// ) +// ) +// } \ No newline at end of file diff --git a/Sources/BeeTracking/AppearanceRAE+Serialization.swift b/Sources/BeeTracking/AppearanceRAE+Serialization.swift index 6c0d5f05..dc3116d6 100644 --- a/Sources/BeeTracking/AppearanceRAE+Serialization.swift +++ b/Sources/BeeTracking/AppearanceRAE+Serialization.swift @@ -71,4 +71,26 @@ extension DenseRAE { self.decoder_conv1.numpyWeights ].reduce([], +) } + } + + +extension NNClassifier { + /// Loads model weights from the numpy arrays in `weights`. + public mutating func load(weights: PythonObject) { + self.encoder_conv1.load(weights: weights[0..<2]) + self.encoder1.load(weights: weights[2..<4]) + self.encoder2.load(weights: weights[4..<6]) + self.encoder3.load(weights: weights[6..<8]) + } + + /// The model weights as numpy arrays. + public var numpyWeights: PythonObject { + [ + self.encoder_conv1.numpyWeights, + self.encoder1.numpyWeights, + self.encoder2.numpyWeights, + self.encoder3.numpyWeights + ].reduce([], +) + } +} \ No newline at end of file diff --git a/Sources/BeeTracking/BigTransfer.swift b/Sources/BeeTracking/BigTransfer.swift new file mode 100644 index 00000000..d1f57b56 --- /dev/null +++ b/Sources/BeeTracking/BigTransfer.swift @@ -0,0 +1,482 @@ +// Copyright 2020 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// Original source: +// "Big Transfer (BiT): General Visual Representation Learning" +// Alexander Kolesnikov, Lucas Beyer, Xiaohua Zhai, Joan Puigcerver, Jessica Yung, Sylvain Gelly, Neil Houlsby. +// https://arxiv.org/abs/1912.11370 + +import Foundation +import TensorFlow +import PythonKit +import BeeDataset +import SwiftFusion + +let subprocess = Python.import("subprocess") +let np = Python.import("numpy") + + +/// Convenient layer wrapper used to load all of the trained layers from the .npz file downloaded from the +/// BigTransfer weights repository +struct BigTransferNamedLayer { + let name: String + var layer: Tensor +} + +func getResolution(originalResolution: (Int, Int)) -> (Int, Int) { + let area = originalResolution.0 * originalResolution.1 + return area < 96*96 ? (160, 128) : (512, 480) + } + +/// Get the necessary padding to maintain the network size specified in the BigTransfer architecture +/// +/// - Parameters: +/// - kernelSize: size n which represents the height and width of the nxn kernel +/// - Returns: the left / top padding and the right / bottom padding necessary to maintain correct output sizes +/// after convolution +func paddingFromKernelSize(kernelSize: Int) -> [(before: Int, after: Int)] { + let padTotal = kernelSize - 1 + let padBeginning = Int(padTotal / 2) + let padEnd = padTotal - padBeginning + let padding = [ + (before: 0, after: 0), + (before: padBeginning, after: padEnd), + (before: padBeginning, after: padEnd), + (before: 0, after: 0)] + return padding +} + +/// Get all of the pre-trained layers from the .npz file into a Swift array to load into the BigTransfer model +/// +/// - Parameters: +/// - modelName: model name that represents the weights to load from the BigTransfer weights repository +/// ("BiT-M-R50x1" for example) +/// - Returns: an array of layers and their associated name in the .npz file downloaded from the weights repository +func getPretrainedWeightsDict(modelName: String) -> Array { + let validTypes = ["BiT-S", "BiT-M"] + let validSizes = [(50, 1), (50, 3), (101, 1), (101, 3), (152, 4)] + let bitURL = "https://storage.googleapis.com/bit_models/" + var knownModels = [String: String]() + + for types in validTypes { + for sizes in validSizes { + let modelString = types + "-R" + String(sizes.0) + "x" + String(sizes.1) + knownModels[modelString] = bitURL + modelString + ".npz" + } + } + + if let modelPath = knownModels[modelName] { + subprocess.call("wget " + modelPath + " .", shell: true) + } + + let weights = np.load("./" + modelName + ".npz") + + var weightsArray = Array() + for param in weights { + weightsArray.append(BigTransferNamedLayer(name: String(param)!, layer: Tensor(numpy: weights[param])!)) + } + return weightsArray +} + +/// A 2D Convolution layer that standardizes the weights before the forward pass. This has been implemented in +/// accordance with the implementation in https://github.com/google-research/big_transfer/blob/49afe42338b62af9fbe18f0258197a33ee578a6b/bit_pytorch/models.py#L25 +public struct StandardizedConv2D: Layer { + public var conv: Conv2D + + public init( + filterShape: (Int, Int, Int, Int), + strides: (Int, Int) = (1, 1), + padding: Padding = .valid, + useBias: Bool = true + ) + { + self.conv = Conv2D( + filterShape: filterShape, + strides: strides, + padding: padding, + useBias: useBias) + } + + @differentiable + public func callAsFunction(_ input: Tensor) -> Tensor { + let axes: Array = [0, 1, 2] + var standardizedConv = conv + standardizedConv.filter = (standardizedConv.filter - standardizedConv.filter.mean(squeezingAxes: axes)) / sqrt((standardizedConv.filter.variance(squeezingAxes: axes) + 1e-16)) + return standardizedConv(input) + } + +} + +/// A standardized convolution and group norm layer as specified in the BigTransfer architecture +public struct ConvGNV2BiT: Layer { + public var conv: StandardizedConv2D + public var norm: GroupNorm + @noDerivative public var isSecond: Bool + + public init( + inFilters: Int, + outFilters: Int, + kernelSize: Int = 1, + stride: Int = 1, + padding: Padding = .valid, + isSecond: Bool = false + ) { + self.conv = StandardizedConv2D( + filterShape: (kernelSize, kernelSize, inFilters, outFilters), + strides: (stride, stride), + padding: padding, + useBias: false) + self.norm = GroupNorm( + offset: Tensor(zeros: [inFilters]), + scale: Tensor(zeros: [inFilters]), + groupCount: 2, + axis: -1, + epsilon: 0.001) + self.isSecond = isSecond + } + + @differentiable + public func callAsFunction(_ input: Tensor) -> Tensor { + var normResult = norm(input) + if self.isSecond { + normResult = normResult.padded(forSizes: paddingFromKernelSize(kernelSize: 3)) + } + let reluResult = relu(normResult) + let convResult = conv(reluResult) + return convResult + } +} + +/// The shortcut in a residual block with standardized convolution and group normalization +public struct ShortcutBiT: Layer { + public var projection: StandardizedConv2D + public var norm: GroupNorm + @noDerivative public let needsProjection: Bool + + public init(inFilters: Int, outFilters: Int, stride: Int) { + needsProjection = (stride > 1 || inFilters != outFilters) + norm = GroupNorm( + offset: Tensor(zeros: [needsProjection ? inFilters : 1]), + scale: Tensor(zeros: [needsProjection ? inFilters : 1]), + groupCount: needsProjection ? 2 : 1, + axis: -1, + epsilon: 0.001) + + projection = StandardizedConv2D( + filterShape: (1, 1, needsProjection ? inFilters : 1, needsProjection ? outFilters : 1), + strides: (stride, stride), + padding: .valid, + useBias: false) + } + + @differentiable + public func callAsFunction(_ input: Tensor) -> Tensor { + var res = input + if needsProjection { + res = norm(res) + res = relu(res) + res = projection(res) + } + return res + } +} + +/// Residual block for BigTransfer with standardized convolution and group normalization layers +public struct ResidualBlockBiT: Layer { + public var shortcut: ShortcutBiT + public var convs: [ConvGNV2BiT] + + public init(inFilters: Int, outFilters: Int, stride: Int, expansion: Int){ + if expansion == 1 { + convs = [ + ConvGNV2BiT(inFilters: inFilters, outFilters: outFilters, kernelSize: 3, stride: stride), + ConvGNV2BiT(inFilters: outFilters, outFilters: outFilters, kernelSize: 3, isSecond: true) + ] + } else { + convs = [ + ConvGNV2BiT(inFilters: inFilters, outFilters: outFilters/4), + ConvGNV2BiT(inFilters: outFilters/4, outFilters: outFilters/4, kernelSize: 3, stride: stride, isSecond: true), + ConvGNV2BiT(inFilters: outFilters/4, outFilters: outFilters) + ] + } + shortcut = ShortcutBiT(inFilters: inFilters, outFilters: outFilters, stride: stride) + } + + @differentiable + public func callAsFunction(_ input: Tensor) -> Tensor { + let convResult = convs.differentiableReduce(input) { $1($0) } + return convResult + shortcut(input) + } +} + +/// An implementation of the BigTransfer architecture with variable sizes +public struct BigTransfer: Layer { + public var inputStem: StandardizedConv2D + public var maxPool: MaxPool2D + public var residualBlocks: [ResidualBlockBiT] = [] + public var groupNorm : GroupNorm + public var flatten = Flatten() + public var classifier: Dense + public var avgPool = GlobalAvgPool2D() + @noDerivative public var finalOutFilter : Int = 0 + + /// Initialize the BigTransfer Model + /// + /// - Parameters: + /// - classCount: the number of output classes + /// - depth: the specified depht of the network based on the various ResNet architectures + /// - inputChannels: the number of input channels for the dataset + /// - stemFilters: the number of filters in the first three convolutions + public init( + classCount: Int, + depth: Depth, + inputChannels: Int = 3, + modelName: String = "BiT-M-R50x1", + loadWeights: Bool = true + ) { + + self.inputStem = StandardizedConv2D(filterShape: (7, 7, 3, 64), strides: (2, 2), padding: .valid, useBias: false) + self.maxPool = MaxPool2D(poolSize: (3, 3), strides: (2, 2), padding: .valid) + let sizes = [64 / depth.expansion, 64, 128, 256, 512] + for (iBlock, nBlocks) in depth.layerBlockSizes.enumerated() { + let (nIn, nOut) = (sizes[iBlock] * depth.expansion, sizes[iBlock+1] * depth.expansion) + for j in 0..( + offset: Tensor(zeros: [self.finalOutFilter]), + scale: Tensor(zeros: [self.finalOutFilter]), + groupCount: 2, + axis: -1, + epsilon: 0.001) + self.classifier = Dense(inputSize: 512 * depth.expansion, outputSize: classCount) + + if loadWeights { + let weightsArray = getPretrainedWeightsDict(modelName: modelName) + + // Load weights from model .npz file into the BigTransfer model + let convs = weightsArray.filter {key in return key.name.contains("/block") && key.name.contains("standardized_conv2d/kernel") && !(key.name.contains("proj"))} + + var k = 0 + for (idx, i) in self.residualBlocks.enumerated() { + for (jdx, _) in i.convs.enumerated() { + assert(self.residualBlocks[idx].convs[jdx].conv.conv.filter.shape == convs[k].layer.shape) + self.residualBlocks[idx].convs[jdx].conv.conv.filter = convs[k].layer + k = k + 1 + } + } + + let projectiveConvs = weightsArray.filter {key in return key.name.contains("/block") && key.name.contains("standardized_conv2d/kernel") && (key.name.contains("proj"))} + var normScale = weightsArray.filter {key in return key.name.contains("unit01/a/group_norm/gamma")} + var normOffset = weightsArray.filter {key in return key.name.contains("unit01/a/group_norm/beta")} + + k = 0 + for (idx, i) in self.residualBlocks.enumerated() { + if (i.shortcut.projection.conv.filter.shape != [1, 1, 1, 1]) + { + assert(self.residualBlocks[idx].shortcut.projection.conv.filter.shape == projectiveConvs[k].layer.shape) + self.residualBlocks[idx].shortcut.projection.conv.filter = projectiveConvs[k].layer + + assert(self.residualBlocks[idx].shortcut.norm.scale.shape == normScale[k].layer.shape) + self.residualBlocks[idx].shortcut.norm.scale = normScale[k].layer + + assert(self.residualBlocks[idx].shortcut.norm.offset.shape == normOffset[k].layer.shape) + self.residualBlocks[idx].shortcut.norm.offset = normOffset[k].layer + k = k + 1 + } + } + + normScale = weightsArray.filter {key in return key.name.contains("gamma")} + k = 0 + for (idx, i) in self.residualBlocks.enumerated() { + for (jdx, _) in i.convs.enumerated() { + assert(normScale[k].layer.shape == self.residualBlocks[idx].convs[jdx].norm.scale.shape) + self.residualBlocks[idx].convs[jdx].norm.scale = normScale[k].layer + k = k + 1 + } + } + + normOffset = weightsArray.filter {key in return key.name.contains("beta")} + + var l = 0 + for (idx, i) in self.residualBlocks.enumerated() { + for (jdx, _) in i.convs.enumerated() { + assert(normOffset[l].layer.shape == self.residualBlocks[idx].convs[jdx].norm.offset.shape) + self.residualBlocks[idx].convs[jdx].norm.offset = normOffset[l].layer + l = l + 1 + } + } + + assert(self.groupNorm.scale.shape == normScale[k].layer.shape) + self.groupNorm.scale = normScale[k].layer + assert(self.groupNorm.offset.shape == normOffset[l].layer.shape) + self.groupNorm.offset = normOffset[l].layer + + let rootConvs = weightsArray.filter {key in return key.name.contains("root_block")} + assert(self.inputStem.conv.filter.shape == rootConvs[0].layer.shape) + self.inputStem.conv.filter = rootConvs[0].layer + } + } + + @differentiable(wrt: imageBatch) + public func classify(_ imageBatch: Tensor) -> Tensor { + var grayscale = Tensor(imageBatch.unstacked(alongAxis: 3)[0]) + var rgbImages = Tensor(stacking: [grayscale, grayscale, grayscale], alongAxis: 3) + + var resizeSize = getResolution(originalResolution: (40, 70)) + var resized = resize(images: rgbImages, size: (resizeSize.0, resizeSize.1)) + return Tensor(callAsFunction(resized)) + } + + @differentiable + public func callAsFunction(_ input: Tensor) -> Tensor { + var paddedInput = input.padded(forSizes: paddingFromKernelSize(kernelSize: 7)) + paddedInput = inputStem(paddedInput).padded(forSizes: paddingFromKernelSize(kernelSize: 3)) + let inputLayer = maxPool(paddedInput) + let blocksReduced = residualBlocks.differentiableReduce(inputLayer) { $1($0) } + let normalized = relu(groupNorm(blocksReduced)) + return normalized.sequenced(through: avgPool, flatten, classifier) + } + + public func save(new_path: String = "new_weights", path: String = "BiT-M-R50x1") { + var weightsArray = getPretrainedWeightsDict(modelName: path) + + // Load weights from model .npz file into the BigTransfer model + var convs = weightsArray.filter {key in return key.name.contains("/block") && key.name.contains("standardized_conv2d/kernel") && !(key.name.contains("proj"))} + var k = 0 + for (idx, i) in self.residualBlocks.enumerated() { + for (jdx, _) in i.convs.enumerated() { + assert(self.residualBlocks[idx].convs[jdx].conv.conv.filter.shape == convs[k].layer.shape) + convs[k].layer = self.residualBlocks[idx].convs[jdx].conv.conv.filter + k = k + 1 + } + } + + var projectiveConvs = weightsArray.filter {key in return key.name.contains("/block") && key.name.contains("standardized_conv2d/kernel") && (key.name.contains("proj"))} + var normScale = weightsArray.filter {key in return key.name.contains("unit01/a/group_norm/gamma")} + var normOffset = weightsArray.filter {key in return key.name.contains("unit01/a/group_norm/beta")} + + k = 0 + for (idx, i) in self.residualBlocks.enumerated() { + if (i.shortcut.projection.conv.filter.shape != [1, 1, 1, 1]) + { + assert(self.residualBlocks[idx].shortcut.projection.conv.filter.shape == projectiveConvs[k].layer.shape) + projectiveConvs[k].layer = self.residualBlocks[idx].shortcut.projection.conv.filter + assert(self.residualBlocks[idx].shortcut.norm.scale.shape == normScale[k].layer.shape) + normScale[k].layer = self.residualBlocks[idx].shortcut.norm.scale + + assert(self.residualBlocks[idx].shortcut.norm.offset.shape == normOffset[k].layer.shape) + normOffset[k].layer = self.residualBlocks[idx].shortcut.norm.offset + k = k + 1 + } + } + + var gammaNormScale = weightsArray.filter {key in return key.name.contains("gamma")} + k = 0 + for (idx, i) in self.residualBlocks.enumerated() { + for (jdx, _) in i.convs.enumerated() { + assert(gammaNormScale[k].layer.shape == self.residualBlocks[idx].convs[jdx].norm.scale.shape) + gammaNormScale[k].layer = self.residualBlocks[idx].convs[jdx].norm.scale + k = k + 1 + } + } + + var betaNormOffset = weightsArray.filter {key in return key.name.contains("beta")} + + var l = 0 + for (idx, i) in self.residualBlocks.enumerated() { + for (jdx, _) in i.convs.enumerated() { + assert(betaNormOffset[l].layer.shape == self.residualBlocks[idx].convs[jdx].norm.offset.shape) + betaNormOffset[l].layer = self.residualBlocks[idx].convs[jdx].norm.offset + l = l + 1 + } + } + + assert(self.groupNorm.scale.shape == gammaNormScale[k].layer.shape) + gammaNormScale[k].layer = self.groupNorm.scale + assert(self.groupNorm.offset.shape == betaNormOffset[l].layer.shape) + betaNormOffset[l].layer = self.groupNorm.offset + + var rootConvs = weightsArray.filter {key in return key.name.contains("root_block")} + assert(self.inputStem.conv.filter.shape == rootConvs[0].layer.shape) + rootConvs[0].layer = self.inputStem.conv.filter + + var newWeights = convs + projectiveConvs + normScale + normOffset + gammaNormScale + betaNormOffset + + var weightDict: [String: Tensor] = [rootConvs[0].name: rootConvs[0].layer] + + for (i, weight) in newWeights.enumerated() { + weightDict[weight.name] = weight.layer + } + + let np = Python.import("numpy") + let zipfile = Python.import("zipfile") + let format = Python.import("numpy.lib.format") + let compat = Python.import("numpy.compat") + var file = compat.os_fspath(new_path + ".npz") + var zipf = zipfile.ZipFile(file, mode: "w", compression: zipfile.ZIP_STORED, allowZip64: true) + + for (i, weight) in weightsArray.enumerated() { + var fname = weight.name + ".npy" + let keyExists = weightDict[weight.name] != nil + var val = np.asanyarray( weight.layer.makeNumpyArray()) + if keyExists { + print("key does exist") + val = weightDict[weight.name]!.makeNumpyArray() + } + var file_id = zipf.open(fname, "w", force_zip64: true) + format.write_array(file_id, weight.layer.makeNumpyArray(), + allow_pickle: true) + } + + } +} + + + +extension BigTransfer { + public enum Depth { + case resNet18 + case resNet34 + case resNet50 + case resNet101 + case resNet152 + + var expansion: Int { + switch self { + case .resNet18, .resNet34: return 1 + default: return 4 + } + } + + var layerBlockSizes: [Int] { + switch self { + case .resNet18: return [2, 2, 2, 2] + case .resNet34: return [3, 4, 6, 3] + case .resNet50: return [3, 4, 6, 3] + case .resNet101: return [3, 4, 23, 3] + case .resNet152: return [3, 8, 36, 3] + } + } + } +} + +extension BigTransfer: Classifier {} \ No newline at end of file diff --git a/Sources/BeeTracking/NNClassifier.swift b/Sources/BeeTracking/NNClassifier.swift new file mode 100644 index 00000000..cf49f0b7 --- /dev/null +++ b/Sources/BeeTracking/NNClassifier.swift @@ -0,0 +1,607 @@ +// Copyright 2020 The SwiftFusion Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import SwiftFusion +import TensorFlow +import PythonKit +import BeeDataset + +// MARK: - The Regularized Autoencoder model +/// A Regularized Autoencoder (RAE) [1] that encodes the appearance of an image patch. +/// + +public struct BeeBatch { + let patch: Tensor + let label: Tensor +} +/// Conform `IrisBatch` to `Collatable` so that we can load it into a `TrainingEpoch`. +extension BeeBatch: Collatable { + public init(collating samples: BatchSamples) + where BatchSamples.Element == Self { + patch = Tensor(stacking: samples.map{$0.patch}) + label = Tensor(stacking: samples.map{$0.label}) + } +} + + +/// [1] https://openreview.net/forum?id=S1g7tpEYDS +public struct NNClassifier: Layer{ + /// The height of the input image in pixels. + @noDerivative public let imageHeight: Int + + /// The width of the input image in pixels. + @noDerivative public let imageWidth: Int + + /// The number of channels in the input image. + @noDerivative public let imageChannels: Int + + /// The number of activations in the hidden layer. + @noDerivative public let hiddenDimension: Int + + /// The number of activations in the appearance code. + @noDerivative public let latentDimension: Int + + /// First conv to downside the image + public var encoder_conv1: Conv2D + + /// Max pooling of factor 2 + var encoder_pool1: MaxPool2D + + /// First FCN encoding layer goes from image to hidden dimension + public var encoder1: Dense + + /// Second goes from dense features to latent code + public var encoder2: Dense + + /// Third goes from latent to 1 + public var encoder3: Dense + + // /// Decode from latent to dense hidden layer with same dimsnions as before + // var decoder1: Dense + + // /// Finally, reconstruct grayscale (or RGB) image + // var decoder2: Dense + + // var decoder_upsample1: UpSampling2D + + // var decoder_conv1: Conv2D + + /// Creates an instance for images with size `[imageHeight, imageWidth, imageChannels]`, with + /// hidden and latent dimensions given by `hiddenDimension` and `latentDimension`. + public init( + imageHeight: Int, imageWidth: Int, imageChannels: Int, + hiddenDimension: Int, latentDimension: Int + ) { + self.imageHeight = imageHeight + self.imageWidth = imageWidth + self.imageChannels = imageChannels + self.hiddenDimension = hiddenDimension + self.latentDimension = latentDimension + + encoder_conv1 = Conv2D(filterShape: (3, 3, imageChannels, imageChannels), padding: .same, activation: relu) + + encoder_pool1 = MaxPool2D(poolSize: (2, 2), strides: (2, 2), padding: .same) + + encoder1 = Dense( + inputSize: imageHeight * imageWidth * imageChannels / 4, + outputSize: hiddenDimension, + activation: relu) + + encoder2 = Dense( + inputSize: hiddenDimension, + outputSize: latentDimension, + activation: relu) + + encoder3 = Dense( + inputSize: latentDimension, + outputSize: 2) + + } + + /// Initialize given an image batch + public typealias HyperParameters = (hiddenDimension: Int, latentDimension: Int, weightFile: String, learningRate: Float) + // public init(from imageBatch: Tensor, given parameters: HyperParameters? = nil) { + public init(patches patches: Tensor, labels labels: Tensor, given parameters: HyperParameters? = nil, train_mode: String) { + print("init from image batch") + let (H_, W_, C_) = (patches.shape[1], patches.shape[2], 1) + let h = parameters!.hiddenDimension + let d = parameters!.latentDimension + var model = NNClassifier(imageHeight: H_, imageWidth: W_, imageChannels: C_, + hiddenDimension: h, latentDimension: d) + if train_mode == "pretrained" { + print("PRETRAINED") + let np = Python.import("numpy") + print("loading pretrained weights") + model.load(weights: np.load(parameters!.weightFile, allow_pickle: true)) + } + + + + let optimizer = Adam(for: model) + optimizer.learningRate = parameters!.learningRate + + let lossFunc = NNClassifierLoss() + // Issues I came across: TrainingEpochs function was scrambling the order + // Then the map function was too slow during training. + + // Thread-local variable that model layers read to know their mode + Context.local.learningPhase = .training + + let trainingData : [BeeBatch] = (zip(patches.unstacked(), labels.unstacked()).map{BeeBatch(patch: $0.0, label: $0.1)}) + + let epochs = TrainingEpochs(samples: trainingData, batchSize: 200) // this is an array + // + var trainLossResults: [Double] = [] + let epochCount = 100 + for (epochIndex, epoch) in epochs.prefix(epochCount).enumerated() { + var epochLoss: Double = 0 + var batchCount: Int = 0 + for batchSamples in epoch { + let batch = batchSamples.collated + let (loss, grad) = valueWithGradient(at: model) { lossFunc($0, batch) } + optimizer.update(&model, along: grad) + epochLoss += loss.scalarized() + batchCount += 1 + } + epochLoss /= Double(batchCount) + trainLossResults.append(epochLoss) + if epochIndex % 5 == 0 { + print("\nEpoch \(epochIndex):", terminator:"") + } + print(" \(epochLoss),", terminator: "") + } + + self = model + } + + /// Differentiable encoder + @differentiable(wrt: imageBatch) + public func classify(_ imageBatch: Tensor) -> Tensor { + let batchSize = imageBatch.shape[0] + let expectedShape: TensorShape = [batchSize, imageHeight, imageWidth, imageChannels] + precondition( + imageBatch.shape == expectedShape, + "input shape is \(imageBatch.shape), but expected \(expectedShape)") + return imageBatch + .sequenced(through: encoder_conv1, encoder_pool1).reshaped(to: [batchSize, imageHeight * imageWidth * imageChannels / 4]) + .sequenced(through: encoder1, encoder2, encoder3) + } + + /// Standard: add syntactic sugar to apply model as a function call. + @differentiable + public func callAsFunction(_ imageBatch: Tensor) -> Tensor { + let output = classify(imageBatch) + return output + } +} + + + +/// The loss function for the `DenseRAE`. +public struct NNClassifierLoss { + + /// Return the loss of `model` on `imageBatch`. + /// Parameter printLoss: Whether to print the loss and its components. + @differentiable + public func callAsFunction( + _ model: NNClassifier, _ imageBatch: BeeBatch, printLoss: Bool = false + ) -> Tensor { + let batchSize = imageBatch.patch.shape[0] + let output = model(imageBatch.patch) + let totalLoss = softmaxCrossEntropy(logits: output, labels: Tensor(imageBatch.label)) + return totalLoss + } + +} + +extension NNClassifier: Classifier {} + + + +public struct PretrainedNNClassifier : Classifier{ + public var inner: NNClassifier + + /// Constructor that does training of the network + public init(patches patches: Tensor, labels labels: Tensor, given: HyperParameters, train_mode: String) { + inner = NNClassifier( + patches: patches, labels: labels, given: (given != nil) ? + (hiddenDimension: given.hiddenDimension, + latentDimension: given.latentDimension, + weightFile: given.weightFile, + learningRate: given.learningRate) : nil, train_mode: train_mode + ) + + + } + + /// Save the weight to file + public func save(to path: String) { + let np = Python.import("numpy") + np.save(path, np.array(inner.numpyWeights, dtype: Python.object)) + } + + @differentiable + public func classify(_ imageBatch: Tensor) -> Tensor { + inner.classify(imageBatch) + } + + /// Initialize given an image batch + public typealias HyperParameters = (hiddenDimension: Int, latentDimension: Int, weightFile: String, learningRate: Float) +} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +// /// [1] https://openreview.net/forum?id=S1g7tpEYDS +// public struct SmallerNNClassifier: Layer{ +// @noDerivative public let imageHeight: Int +// @noDerivative public let imageWidth: Int +// @noDerivative public let imageChannels: Int +// @noDerivative public let latentDimension: Int +// public var encoder_conv1: Conv2D +// var encoder_pool1: MaxPool2D +// public var encoder1: Dense +// public var encoder2: Dense + +// public init( +// imageHeight: Int, imageWidth: Int, imageChannels: Int, latentDimension: Int +// ) { +// self.imageHeight = imageHeight +// self.imageWidth = imageWidth +// self.imageChannels = imageChannels +// self.latentDimension = latentDimension + +// encoder_conv1 = Conv2D(filterShape: (3, 3, imageChannels, imageChannels), padding: .same, activation: relu) + +// encoder_pool1 = MaxPool2D(poolSize: (2, 2), strides: (2, 2), padding: .same) + +// encoder1 = Dense( +// inputSize: imageHeight * imageWidth * imageChannels / 4, +// outputSize: latentDimension, +// activation: relu) + +// encoder2 = Dense( +// inputSize: latentDimension, +// outputSize: 2) + +// } + +// /// Initialize given an image batch +// public init(patches patches: Tensor, labels labels: Tensor, given latentDimension: Int? = nil) { +// print("init from image batch") +// let (H_, W_, C_) = (patches.shape[1], patches.shape[2], 1) +// let d = latentDimension ?? 10 +// var model = SmallerNNClassifier(imageHeight: H_, imageWidth: W_, imageChannels: C_, latentDimension: d) + +// let optimizer = Adam(for: model) +// optimizer.learningRate = 1e-3 + +// let lossFunc = NNClassifierLoss() +// Context.local.learningPhase = .training +// let trainingData : [BeeBatch] = (zip(patches.unstacked(), labels.unstacked()).map{BeeBatch(patch: $0.0, label: $0.1)}) +// let epochs = TrainingEpochs(samples: trainingData, batchSize: 200) // this is an array +// var trainLossResults: [Double] = [] +// let epochCount = 600 +// for (epochIndex, epoch) in epochs.prefix(epochCount).enumerated() { +// var epochLoss: Double = 0 +// var batchCount: Int = 0 +// for batchSamples in epoch { +// let batch = batchSamples.collated +// let (loss, grad) = valueWithGradient(at: model) { lossFunc($0, batch) } +// optimizer.update(&model, along: grad) +// epochLoss += loss.scalarized() +// batchCount += 1 +// } +// epochLoss /= Double(batchCount) +// trainLossResults.append(epochLoss) +// // if epochIndex % 50 == 0 { +// print("Epoch \(epochIndex): Loss: \(epochLoss)") +// // } +// } + +// self = model +// } + +// /// Differentiable encoder +// @differentiable(wrt: imageBatch) +// public func classify(_ imageBatch: Tensor) -> Tensor { +// let batchSize = imageBatch.shape[0] +// let expectedShape: TensorShape = [batchSize, imageHeight, imageWidth, imageChannels] +// precondition( +// imageBatch.shape == expectedShape, +// "input shape is \(imageBatch.shape), but expected \(expectedShape)") +// return imageBatch +// .sequenced(through: encoder_conv1, encoder_pool1).reshaped(to: [batchSize, imageHeight * imageWidth * imageChannels / 4]) +// .sequenced(through: encoder1, encoder2) +// } + +// /// Standard: add syntactic sugar to apply model as a function call. +// @differentiable +// public func callAsFunction(_ imageBatch: Tensor) -> Tensor { +// let output = classify(imageBatch) +// return output +// } +// } + +// public struct LargerNNClassifier: Layer{ +// @noDerivative public let imageHeight: Int +// @noDerivative public let imageWidth: Int +// @noDerivative public let imageChannels: Int +// @noDerivative public let hiddenDimension: Int +// @noDerivative public let latentDimension: Int +// public var encoder_conv1: Conv2D +// var encoder_pool1: MaxPool2D +// public var encoder1: Dense +// public var encoder2: Dense +// public var encoder3: Dense +// public var encoder4: Dense +// public init( +// imageHeight: Int, imageWidth: Int, imageChannels: Int, +// hiddenDimension: Int, latentDimension: Int +// ) { +// self.imageHeight = imageHeight +// self.imageWidth = imageWidth +// self.imageChannels = imageChannels +// self.hiddenDimension = hiddenDimension +// self.latentDimension = latentDimension + +// encoder_conv1 = Conv2D(filterShape: (3, 3, imageChannels, imageChannels), padding: .same, activation: relu) + +// encoder_pool1 = MaxPool2D(poolSize: (2, 2), strides: (2, 2), padding: .same) + +// encoder1 = Dense( +// inputSize: imageHeight * imageWidth * imageChannels / 4, +// outputSize: hiddenDimension, +// activation: relu) + +// encoder2 = Dense( +// inputSize: hiddenDimension, +// outputSize: hiddenDimension, +// activation: relu) + +// encoder3 = Dense( +// inputSize: hiddenDimension, +// outputSize: latentDimension, +// activation: relu) + +// encoder4 = Dense( +// inputSize: latentDimension, +// outputSize: 2) + +// } + +// /// Initialize given an image batch +// public typealias HyperParameters = (hiddenDimension: Int, latentDimension: Int) +// // public init(from imageBatch: Tensor, given parameters: HyperParameters? = nil) { +// public init(patches patches: Tensor, labels labels: Tensor, given parameters: HyperParameters? = nil) { +// print("init from image batch") +// let (H_, W_, C_) = (patches.shape[1], patches.shape[2], 1) +// let (h,d) = parameters ?? (100,10) +// var model = LargerNNClassifier(imageHeight: H_, imageWidth: W_, imageChannels: C_, +// hiddenDimension: h, latentDimension: d) +// let optimizer = Adam(for: model) +// optimizer.learningRate = 1e-3 +// let lossFunc = NNClassifierLoss() +// Context.local.learningPhase = .training +// let trainingData : [BeeBatch] = (zip(patches.unstacked(), labels.unstacked()).map{BeeBatch(patch: $0.0, label: $0.1)}) +// let epochs = TrainingEpochs(samples: trainingData, batchSize: 200) // this is an array +// // +// var trainLossResults: [Double] = [] +// let epochCount = 600 +// for (epochIndex, epoch) in epochs.prefix(epochCount).enumerated() { +// var epochLoss: Double = 0 +// var batchCount: Int = 0 +// for batchSamples in epoch { +// let batch = batchSamples.collated +// let (loss, grad) = valueWithGradient(at: model) { lossFunc($0, batch) } +// optimizer.update(&model, along: grad) +// epochLoss += loss.scalarized() +// batchCount += 1 +// } +// epochLoss /= Double(batchCount) +// trainLossResults.append(epochLoss) +// if epochIndex % 5 == 0 { +// print("\nEpoch \(epochIndex):", terminator:"") +// } +// print(" \(epochLoss),", terminator: "") +// } + +// // if NSFileManager.fileExistsAtPath(path) { +// // print("File exists") +// // } else { +// // print("File does not exist") +// // } +// // np.save("epochloss\()", Tensor(trainLossResults).makeNumpyArray()) + +// self = model +// } + +// /// Differentiable encoder +// @differentiable(wrt: imageBatch) +// public func classify(_ imageBatch: Tensor) -> Tensor { +// let batchSize = imageBatch.shape[0] +// let expectedShape: TensorShape = [batchSize, imageHeight, imageWidth, imageChannels] +// precondition( +// imageBatch.shape == expectedShape, +// "input shape is \(imageBatch.shape), but expected \(expectedShape)") +// return imageBatch +// .sequenced(through: encoder_conv1, encoder_pool1).reshaped(to: [batchSize, imageHeight * imageWidth * imageChannels / 4]) +// .sequenced(through: encoder1, encoder2, encoder3, encoder4) +// } + +// /// Standard: add syntactic sugar to apply model as a function call. +// @differentiable +// public func callAsFunction(_ imageBatch: Tensor) -> Tensor { +// let output = classify(imageBatch) +// return output +// } +// } + + + + + + +// public struct PretrainedSmallerNNClassifier : Classifier{ +// public var inner: SmallerNNClassifier + +// /// The constructor that only does loading of the pretrained weights. +// public init(from imageBatch: Tensor, given: HyperParameters?) { +// let shape = imageBatch.shape +// precondition(imageBatch.rank == 4, "Wrong image shape \(shape)") +// let (_, H_, W_, C_) = (shape[0], shape[1], shape[2], shape[3]) +// if let params = given { +// var encoder = SmallerNNClassifier( +// imageHeight: H_, imageWidth: W_, imageChannels: 1, latentDimension: params.latentDimension +// ) + +// let np = Python.import("numpy") + +// encoder.load(weights: np.load(params.weightFile, allow_pickle: true)) +// inner = encoder +// } else { +// inner = SmallerNNClassifier( +// imageHeight: H_, imageWidth: W_, imageChannels: 1, latentDimension: 1 +// ) +// fatalError("Must provide hyperparameters to pretrained network") +// } +// } + +// /// Constructor that does training of the network +// public init(patches patches: Tensor, labels labels: Tensor, given: HyperParameters?) { +// inner = SmallerNNClassifier( +// patches: patches, labels: labels, given: (given != nil) ? (given!.latentDimension) : nil +// ) +// } + +// /// Save the weight to file +// public func save(to path: String) { +// let np = Python.import("numpy") +// np.save(path, np.array(inner.numpyWeights, dtype: Python.object)) +// } + +// @differentiable +// public func classify(_ imageBatch: Tensor) -> Tensor { +// inner.classify(imageBatch) +// } + + +// /// Initialize given an image batch +// public typealias HyperParameters = (latentDimension: Int, weightFile: String) +// } + + + + + + +// public struct PretrainedLargerNNClassifier : Classifier{ +// public var inner: LargerNNClassifier + +// /// The constructor that only does loading of the pretrained weights. +// public init(from imageBatch: Tensor, given: HyperParameters?) { +// let shape = imageBatch.shape +// precondition(imageBatch.rank == 4, "Wrong image shape \(shape)") +// let (_, H_, W_, C_) = (shape[0], shape[1], shape[2], shape[3]) +// if let params = given { +// var encoder = LargerNNClassifier( +// imageHeight: H_, imageWidth: W_, imageChannels: 1, +// hiddenDimension: params.hiddenDimension, latentDimension: params.latentDimension +// ) + +// let np = Python.import("numpy") + +// encoder.load(weights: np.load(params.weightFile, allow_pickle: true)) +// inner = encoder +// } else { +// inner = LargerNNClassifier( +// imageHeight: H_, imageWidth: W_, imageChannels: 1, +// hiddenDimension: 1, latentDimension: 1 +// ) +// fatalError("Must provide hyperparameters to pretrained network") +// } +// } + +// /// Constructor that does training of the network +// public init(patches patches: Tensor, labels labels: Tensor, given: HyperParameters?) { +// inner = LargerNNClassifier( +// patches: patches, labels: labels, given: (given != nil) ? (hiddenDimension: given!.hiddenDimension, latentDimension: given!.latentDimension) : nil +// ) +// } + +// /// Save the weight to file +// public func save(to path: String) { +// let np = Python.import("numpy") +// np.save(path, np.array(inner.numpyWeights, dtype: Python.object)) +// } + +// @differentiable +// public func classify(_ imageBatch: Tensor) -> Tensor { +// inner.classify(imageBatch) +// } + + +// /// Initialize given an image batch +// public typealias HyperParameters = (hiddenDimension: Int, latentDimension: Int, weightFile: String) +// } \ No newline at end of file diff --git a/Sources/BeeTracking/OISTBeeVideo+Batches.swift b/Sources/BeeTracking/OISTBeeVideo+Batches.swift index 399eced7..5b7ac722 100644 --- a/Sources/BeeTracking/OISTBeeVideo+Batches.swift +++ b/Sources/BeeTracking/OISTBeeVideo+Batches.swift @@ -86,7 +86,7 @@ extension OISTBeeVideo { /// Anything not completely overlapping labels var deterministicEntropy = ARC4RandomNumberGenerator(seed: 42) let frames = self.randomFrames(self.frames.count, using: &deterministicEntropy) - + // We need `batchSize / frames.count` patches from each frame, plus the remainder of the // integer division. var patchesPerFrame = Array(repeating: batchSize / frames.count, count: frames.count) @@ -103,7 +103,6 @@ extension OISTBeeVideo { rows: patchSize.0, cols: patchSize.1)) } } - return obbs } diff --git a/Sources/BeeTracking/ProbabilisticTracker.swift b/Sources/BeeTracking/ProbabilisticTracker.swift index 24231d77..f1ae8938 100644 --- a/Sources/BeeTracking/ProbabilisticTracker.swift +++ b/Sources/BeeTracking/ProbabilisticTracker.swift @@ -149,12 +149,12 @@ public func trainProbabilisticTracker( bgRandomFrameCount: bgRandomFrameCount, useCache: true ) - let batchPositive = encoder.encode(fg) let foregroundModel = MultivariateGaussian(from:batchPositive, regularizer: 1e-3) + let batchNegative = encoder.encode(bg) - let backgroundModel = GaussianNB(from: batchNegative, regularizer: 1e-3) + let backgroundModel = MultivariateGaussian(from: batchNegative, regularizer: 1e-3) let tracker = makeProbabilisticTracker( model: encoder, @@ -207,7 +207,7 @@ public func makeProbabilisticTracker< appearanceModelSize: targetSize, foregroundModel: foregroundModel, backgroundModel: backgroundModel, - maxPossibleNegativity: 1e4 + maxPossibleNegativity: 1e7 ) ) } diff --git a/Sources/BeeTracking/ProbabilisticTracker2.swift b/Sources/BeeTracking/ProbabilisticTracker2.swift new file mode 100644 index 00000000..dc2921e3 --- /dev/null +++ b/Sources/BeeTracking/ProbabilisticTracker2.swift @@ -0,0 +1,72 @@ +import BeeDataset +import PenguinStructures +import SwiftFusion +import TensorFlow +import PythonKit +import Foundation + +/// Returns a tracking configuration for a tracker using an random projection. +/// +/// Parameter model: The random projection model to use. +/// Parameter frames: The frames of the video where we want to run tracking. +/// Parameter targetSize: The size of the target in the frames. +public func makeProbabilisticTracker2< + MyClassifier: Classifier +>( + model: MyClassifier, + frames: [Tensor], + targetSize: (Int, Int) +) -> TrackingConfiguration> { + var variableTemplate = VariableAssignments() + var frameVariableIDs = [Tuple1>]() + for _ in 0..>, values: Tuple1, graph: inout FactorGraph) -> () in + let (poseID) = unpack(variables) + let (pose) = unpack(values) + graph.store(WeightedPriorFactorPose2(poseID, pose, weight: 1e-2, rotWeight: 2e2)) + } + + let addTrackingFactor = { (variables: Tuple1>, frame: Tensor, graph: inout FactorGraph) -> () in + let (poseID) = unpack(variables) + graph.store( + ProbablisticTrackingFactor2(poseID, + measurement: frame, + classifier: model, + patchSize: targetSize, + appearanceModelSize: targetSize + ) + ) + } + + return TrackingConfiguration( + frames: frames, + variableTemplate: variableTemplate, + frameVariableIDs: frameVariableIDs, + addPriorFactor: addPrior, + addTrackingFactor: addTrackingFactor, + addBetweenFactor: { (variables1, variables2, graph) -> () in + let (poseID1) = unpack(variables1) + let (poseID2) = unpack(variables2) + graph.store(WeightedBetweenFactorPose2(poseID1, poseID2, Pose2(), weight: 1e-2, rotWeight: 2e2)) + }, + addFixedBetweenFactor: { (values, variables, graph) -> () in + let (prior) = unpack(values) + let (poseID) = unpack(variables) + graph.store(WeightedPriorFactorPose2SD(poseID, prior, sdX: 8, sdY: 8, sdTheta:0.4)) + }) +} + +/// Returns `t` as a Swift tuple. +fileprivate func unpack(_ t: Tuple2) -> (A, B) { + return (t.head, t.tail.head) +} +/// Returns `t` as a Swift tuple. +fileprivate func unpack(_ t: Tuple1) -> (A) { + return (t.head) +} \ No newline at end of file diff --git a/Sources/BeeTracking/TrackingFactorGraph.swift b/Sources/BeeTracking/TrackingFactorGraph.swift index 5cfeb9e1..89dd760e 100644 --- a/Sources/BeeTracking/TrackingFactorGraph.swift +++ b/Sources/BeeTracking/TrackingFactorGraph.swift @@ -134,7 +134,8 @@ public struct TrackingConfiguration { ) -> () /// The optimizer to use during inference. - public var optimizer = LM() + // public var optimizer = LM() + public var optimizer = GradientDescent(learningRate: 1e-3) /// Creates an instance. /// @@ -172,6 +173,7 @@ public struct TrackingConfiguration { self.addBetweenFactor = addBetweenFactor self.addFixedBetweenFactor = addFixedBetweenFactor! + // For LM self.optimizer.precision = 1e-1 self.optimizer.max_iteration = 100 self.optimizer.cgls_precision = 1e-5 @@ -194,16 +196,16 @@ public struct TrackingConfiguration { // First get pose IDs: pose is assumed to be first variable in the frameVariableID tuple let currentPoseID = (frameVariableIDs[i + 1] as! Tuple1>).head let previousPoseID = (frameVariableIDs[i] as! Tuple1>).head - // Remember best pose var bestPose = x[currentPoseID] - // Sample from motion model and take best pose var bestError = g.error(at: x) - for _ in 0..<2000 { + + for _ in 0..<256 { x[currentPoseID] = x[previousPoseID] - x[currentPoseID].perturbWith(stddev: Vector3(0.3, 8, 4.6)) + x[currentPoseID].perturbWith(stddev: Vector3(0.2, 8, 8)) let candidateError = g.error(at: x) + if candidateError < bestError { bestError = candidateError bestPose = x[currentPoseID] @@ -212,20 +214,19 @@ public struct TrackingConfiguration { x[currentPoseID] = bestPose } + /// Extend the track mutating func extendTrack(x: inout VariableAssignments, fromFrame i:Int, withSampling samplingFlag: Bool = false ) { let currentVarID = frameVariableIDs[i + 1] let previousVarID = frameVariableIDs[i] - // Create a tracking factor graph on just the `i+1`-th variable. var g = graph(on: (i + 1)..<(i + 2)) // The `i`-th variable is already initialized well, so add a prior factor that it stays // near its current position. addFixedBetweenFactor(x[previousVarID], currentVarID, &g) - // Initialize if (samplingFlag) { // Try to initialize pose of the `i+1`-th variable by sampling @@ -256,7 +257,6 @@ public struct TrackingConfiguration { } // TODO: We could also do a final optimization on all the variables jointly here. - return x } } diff --git a/Sources/BeeTracking/TrackingMetrics.swift b/Sources/BeeTracking/TrackingMetrics.swift index 3413a47e..2cdfb529 100644 --- a/Sources/BeeTracking/TrackingMetrics.swift +++ b/Sources/BeeTracking/TrackingMetrics.swift @@ -42,7 +42,7 @@ public struct SubsequenceMetrics: Codable { // Find the first failure frame. var NFsa = prediction.count for (index, overlap) in overlaps.enumerated() { - if overlap < 0.1 { + if overlap < 0.05 { NFsa = index break } @@ -194,6 +194,7 @@ extension TrackerEvaluationDataset { print("Evaluating sequence \(i + 1) of \(sequenceCount)") return sequence.evaluate(tracker, deltaAnchor: deltaAnchor, outputFile: "\(outputFile)-sequence\(i)") } + let result = TrackerEvaluationResults( sequences: sequenceEvaluations, trackerMetrics: TrackerMetrics(sequenceEvaluations.map { $0.sequenceMetrics }), @@ -252,10 +253,13 @@ extension TrackerEvaluationSequence { let subsequence = subsequences[i] print("Evaluating subsequence \(i + 1) of \(subsequences.count)") (buf.baseAddress! + i).initialize(to: tracker(subsequence.frames, subsequence.groundTruth[0])) + } } + actualCount = subsequences.count } + let subsequenceEvaluations = zip(subsequences, subsequencePredictions).map { SubsequenceEvaluationResults( metrics: SubsequenceMetrics(groundTruth: $0.0.groundTruth, prediction: $0.1), @@ -263,7 +267,6 @@ extension TrackerEvaluationSequence { groundTruth: $0.0.groundTruth, frames: $0.0.frames) } - let result = SequenceEvaluationResults( subsequences: subsequenceEvaluations, sequenceMetrics: SequenceMetrics(subsequenceEvaluations.map { $0.metrics })) @@ -281,7 +284,7 @@ extension TrackerEvaluationDataset { for track in video.tracks { let sequence = TrackerEvaluationSequence( frames: Array( - video.frames[track.startFrameIndex..<(track.startFrameIndex + track.boxes.count)]), + video.frames[track.startFrameIndex..<(track.boxes.count)]), groundTruth: track.boxes) sequences.append(sequence) } diff --git a/Sources/BeeTracking/Visualizations.swift b/Sources/BeeTracking/Visualizations.swift index 7b1fc979..f032db6b 100644 --- a/Sources/BeeTracking/Visualizations.swift +++ b/Sources/BeeTracking/Visualizations.swift @@ -60,6 +60,7 @@ public func plotOverlap(metrics: SubsequenceMetrics, on ax: PythonObject) { ax.set_title("Overlap") } + /// plot Comparison image public func plotPatchWithGT(frame: Tensor, actual: Pose2, expected: Pose2) -> (PythonObject, PythonObject) { let plt = Python.import("matplotlib.pyplot") @@ -77,6 +78,183 @@ public func plotPatchWithGT(frame: Tensor, actual: Pose2, expected: Pose2 return (fig, ax) } +public func plotPoseDifference(track: [Pose2], withGroundTruth expected: [Pose2], on ax: PythonObject) { + var thetaDiff = zip(track, expected).map{pow(($0.0.rot.theta - $0.1.rot.theta), 2.0)} + var posDiff = zip(track, expected).map{pow(($0.0.t.x - $0.1.t.x), 2.0) + pow(($0.0.t.y - $0.1.t.y), 2.0)} + ax.plot(thetaDiff, posDiff) + ax.set_title("L2 Theta Difference (X-axis) vs. L2 X, Y Difference Over Time") +} + +public func plotFrameWithPatches(frame: Tensor, actual: Pose2, expected: Pose2, firstGroundTruth: Pose2) -> (PythonObject, PythonObject) { + let plt = Python.import("matplotlib.pyplot") + let mpl = Python.import("matplotlib") + + let (fig, ax) = plt.subplots(figsize: Python.tuple([8, 4])).tuple2 + ax.imshow(frame.makeNumpyArray() / 255.0, cmap: "gray") + let actualBoundingBox = OrientedBoundingBox(center: actual, rows: 40, cols: 70) + ax.plot(actualBoundingBox.corners.map{$0.x} + [actualBoundingBox.corners.first!.x], actualBoundingBox.corners.map{$0.y} + [actualBoundingBox.corners.first!.y], "r-") + var supportPatch = mpl.patches.RegularPolygon( + Python.tuple([actualBoundingBox.center.t.x, actualBoundingBox.center.t.y]), + numVertices:3, + radius:10, + color:"r", + orientation: actualBoundingBox.center.rot.theta - (Double.pi / 2) + ) + ax.add_patch(supportPatch) + + let expectedBoundingBox = OrientedBoundingBox(center: expected, rows: 40, cols: 70) + ax.plot(Python.list(expectedBoundingBox.corners.map{$0.x} + [expectedBoundingBox.corners.first!.x]), Python.list(expectedBoundingBox.corners.map{$0.y} + [expectedBoundingBox.corners.first!.y]), "b-") + + supportPatch = mpl.patches.RegularPolygon( + Python.tuple([expectedBoundingBox.center.t.x, expectedBoundingBox.center.t.y]), + numVertices:3, + radius:10, + color:"b", + orientation: expectedBoundingBox.center.rot.theta - (Double.pi / 2) + ) + ax.add_patch(supportPatch) + ax.set_xlim(expected.t.x - 100, expected.t.x + 100) + ax.set_ylim(expected.t.y - 100, expected.t.y + 100) + + ax.title.set_text("Prediction (Red) vs. Actual (Green)") + return (fig, ax) +} + + +/// plot Comparison image +public func plotFrameWithPatches2(frame: Tensor, actual_box1: OrientedBoundingBox, actual_box2: OrientedBoundingBox, expected: Pose2, firstGroundTruth: Pose2) -> (PythonObject, PythonObject) { + let plt = Python.import("matplotlib.pyplot") + let mpl = Python.import("matplotlib") + let (fig, ax) = plt.subplots(1, 2, figsize: Python.tuple([8, 4])).tuple2 + let np = Python.import("numpy") + let fr = np.squeeze(frame.makeNumpyArray()) + ax[0].imshow(fr / 255.0, cmap: "gray") + ax[1].imshow(fr / 255.0, cmap: "gray") + ax[0].set_axis_off() + ax[1].set_axis_off() + let actualBoundingBox = OrientedBoundingBox(center: actual_box1.center, rows: actual_box1.rows, cols: actual_box1.cols) + ax[0].plot(actualBoundingBox.corners.map{$0.x} + [actualBoundingBox.corners.first!.x], actualBoundingBox.corners.map{$0.y} + [actualBoundingBox.corners.first!.y], "r-") + var supportPatch = mpl.patches.RegularPolygon( + Python.tuple([actualBoundingBox.center.t.x, actualBoundingBox.center.t.y]), + numVertices:3, + radius:10, + color:"r", + orientation: actualBoundingBox.center.rot.theta - (Double.pi / 2) + ) + ax[0].add_patch(supportPatch) + ax[0].add_patch(supportPatch) + ax[0].set_xlim(firstGroundTruth.t.x - 200, firstGroundTruth.t.x + 200) + ax[0].set_ylim(firstGroundTruth.t.y - 200, firstGroundTruth.t.y + 200) + ax[0].title.set_text("RAE 256") + + let actualBoundingBox2 = OrientedBoundingBox(center: actual_box2.center, rows: actual_box2.rows, cols: actual_box2.cols) + ax[1].plot(actualBoundingBox2.corners.map{$0.x} + [actualBoundingBox2.corners.first!.x], actualBoundingBox2.corners.map{$0.y} + [actualBoundingBox2.corners.first!.y], "r-") + + ax[1].set_xlim(firstGroundTruth.t.x - 200, firstGroundTruth.t.x + 200) + ax[1].set_ylim(firstGroundTruth.t.y - 200, firstGroundTruth.t.y + 200) + ax[1].title.set_text("SiamMask") + + return (fig, ax) +} + + +public func plotXYandTheta(xs: [Double], ys: [Double], thetas: [Double]) -> (PythonObject, PythonObject) { + + let plt = Python.import("matplotlib.pyplot") + let np = Python.import("numpy") + + let (fig, axs) = plt.subplots(1,2,figsize: Python.tuple([8, 4])).tuple2 + + let ax2 = axs[0] + ax2.plot(np.arange(0,xs.count), xs) + ax2.plot(np.arange(0,xs.count), ys) + ax2.title.set_text("X and Y") + + + let ax3 = axs[1] + ax3.plot(np.arange(0,xs.count), thetas) + ax3.title.set_text("Theta") + + return (fig, axs) + + +} + + + + +/// plot Optimization beginning, end, +public func plotFrameWithPatches3(frame: Tensor, start: Pose2, end: Pose2, expected: Pose2, firstGroundTruth: Pose2, errors: [Double], xs: [Double], ys: [Double], thetas: [Double]) -> (PythonObject, PythonObject) { + let plt = Python.import("matplotlib.pyplot") + let mpl = Python.import("matplotlib") + let (fig, axs) = plt.subplots(2,3,figsize: Python.tuple([18, 10])).tuple2 + let ax = axs[0][0] + let np = Python.import("numpy") + let fr = np.squeeze(frame.makeNumpyArray()) + ax.imshow(fr / 255.0, cmap: "gray") + let startBoundingBox = OrientedBoundingBox(center: start, rows: 40, cols: 70) + ax.plot(startBoundingBox.corners.map{$0.x} + [startBoundingBox.corners.first!.x], startBoundingBox.corners.map{$0.y} + [startBoundingBox.corners.first!.y], "g-") + + let expectedBoundingBox = OrientedBoundingBox(center: expected, rows: 40, cols: 70) + ax.plot(Python.list(expectedBoundingBox.corners.map{$0.x} + [expectedBoundingBox.corners.first!.x]), Python.list(expectedBoundingBox.corners.map{$0.y} + [expectedBoundingBox.corners.first!.y]), "b-") + var supportPatch = mpl.patches.RegularPolygon( + Python.tuple([expectedBoundingBox.center.t.x, expectedBoundingBox.center.t.y]), + numVertices:3, + radius:10, + color:"b", + orientation: expectedBoundingBox.center.rot.theta - (Double.pi / 2) + ) + ax.add_patch(supportPatch) + supportPatch = mpl.patches.RegularPolygon( + Python.tuple([startBoundingBox.center.t.x, startBoundingBox.center.t.y]), + numVertices:3, + radius:10, + color:"g", + orientation: startBoundingBox.center.rot.theta - (Double.pi / 2) + ) + ax.add_patch(supportPatch) + + + let endBoundingBox = OrientedBoundingBox(center: end, rows: 40, cols: 70) + ax.plot(endBoundingBox.corners.map{$0.x} + [endBoundingBox.corners.first!.x], endBoundingBox.corners.map{$0.y} + [endBoundingBox.corners.first!.y], "r-") + supportPatch = mpl.patches.RegularPolygon( + Python.tuple([endBoundingBox.center.t.x, endBoundingBox.center.t.y]), + numVertices:3, + radius:10, + color:"r", + orientation: endBoundingBox.center.rot.theta - (Double.pi / 2) + ) + ax.add_patch(supportPatch) + + + + ax.set_xlim(firstGroundTruth.t.x - 200, firstGroundTruth.t.x + 200) + ax.set_ylim(firstGroundTruth.t.y - 200, firstGroundTruth.t.y + 200) + ax.title.set_text("Start (Green), End (Red), vs. Label (Blue)") + + let ax1 = axs[0][1] + ax1.plot(np.arange(0,errors.count), errors) + ax1.title.set_text("Error value") + + + let ax2 = axs[0][2] + ax2.plot(np.arange(0,xs.count), xs) + ax2.title.set_text("X") + + let ax4 = axs[1][1] + ax4.plot(np.arange(0,xs.count), ys) + ax4.title.set_text("Y") + + let ax5 = axs[1][2] + ax5.plot(np.arange(0,xs.count), thetas) + ax5.title.set_text("Theta") + + + + return (fig, ax) +} + + /// Calculate the translation error plane (X-Y) public func errorPlaneTranslation< Encoder: AppearanceModelEncoder, diff --git a/Sources/SwiftFusion/Inference/FactorBoilerplate.swift b/Sources/SwiftFusion/Inference/FactorBoilerplate.swift index 51db2e74..f3d563f3 100644 --- a/Sources/SwiftFusion/Inference/FactorBoilerplate.swift +++ b/Sources/SwiftFusion/Inference/FactorBoilerplate.swift @@ -133,6 +133,29 @@ extension LinearizableFactor1 { } +/// A factor, with 2 variable(s), in a factor graph. Uses Vector1 +public protocol LinearizableFactor1a: LinearizableFactor, LinearizableFactor1_ + where Variables == Tuple1, LinearizableComponent == Self {} + +extension LinearizableFactor1a { + /// The variable vertex for this factor's 0-th variable. + public var input0ID: TypedID { return edges.head } + + + // Implements the error as the scalar value of the 1D Vector. + public func error(at x: Variables) -> Double { + return (errorVector(at: x) as! Vector1).x + } + + // Forwarding implementation. + @differentiable + public func errorVector(at x: Variables) -> ErrorVector { + return errorVector(x.head) + } +} + + + // Artifact of Swift weakness. /// Do not use this. Use `Factor2` instead. diff --git a/Sources/SwiftFusion/Inference/FactorsStorage.swift b/Sources/SwiftFusion/Inference/FactorsStorage.swift index 174b72aa..fdd65b4f 100644 --- a/Sources/SwiftFusion/Inference/FactorsStorage.swift +++ b/Sources/SwiftFusion/Inference/FactorsStorage.swift @@ -62,7 +62,16 @@ extension ArrayStorage where Element: VectorFactor { let (lFactor, lVars) = factor.linearizableComponent(at: vars) let gradIndices = LVariables.linearized(lFactor.edges) let grads = GradVariables(at: gradIndices, in: GradVariables.withoutMutation(gradBufs)) - let newGrads = grads + gradient(at: lVars) { lFactor.errorVector(at: $0).squaredNorm } + + var newGrads = grads + if let gradUpdate = lFactor.errorVector(at: lVars) as? Vector3 { + newGrads = newGrads + gradient(at: lVars) { lFactor.errorVector(at: $0).squaredNorm } + } + else { + var currGrads = gradient(at: lVars) { (lFactor.errorVector(at: $0) as! Vector1 + Vector1(1000.0)).squaredNorm } as! PenguinStructures.Tuple + currGrads.head.x = currGrads.head.x / 100.0 + newGrads = newGrads + (currGrads as! Element.LinearizableComponent.Variables.TangentVector) + } newGrads.assign(into: gradIndices, in: gradBufs) } } diff --git a/Sources/SwiftFusion/Inference/LatentAppearanceTrackingFactor.swift b/Sources/SwiftFusion/Inference/LatentAppearanceTrackingFactor.swift index f98b799b..fdf8a89a 100644 --- a/Sources/SwiftFusion/Inference/LatentAppearanceTrackingFactor.swift +++ b/Sources/SwiftFusion/Inference/LatentAppearanceTrackingFactor.swift @@ -2,14 +2,24 @@ import PenguinParallel import PenguinStructures import TensorFlow -public protocol AppearanceModelEncoder { - associatedtype HyperParameters - init(from imageBatch: Tensor, given: HyperParameters?) +// Same as Encoder. To be used only for an end-to-end classifier +public protocol Classifier { + @differentiable + func classify(_ imageBatch: Tensor) -> Tensor +} +// To be used as an encoder. +public protocol Encoder { @differentiable func encode(_ imageBatch: Tensor) -> Tensor } + +public protocol AppearanceModelEncoder : Encoder { + associatedtype HyperParameters + init(from imageBatch: Tensor, given: HyperParameters?) +} + public extension AppearanceModelEncoder { /// Extension allows to have a default nil parameter init(from imageBatch: Tensor) { diff --git a/Sources/SwiftFusion/Inference/ProbablisticTrackingFactor.swift b/Sources/SwiftFusion/Inference/ProbablisticTrackingFactor.swift index 585948e4..b5f4ae49 100644 --- a/Sources/SwiftFusion/Inference/ProbablisticTrackingFactor.swift +++ b/Sources/SwiftFusion/Inference/ProbablisticTrackingFactor.swift @@ -88,3 +88,48 @@ public struct ProbablisticTrackingFactor< return Vector1(result) } } + + +public struct ProbablisticTrackingFactor2< + MyClassifier: Classifier + >: LinearizableFactor1a { + public typealias V0 = Pose2 + + public let edges: Variables.Indices + + public let measurement: ArrayImage + + public let classifier: MyClassifier + + public var patchSize: (Int, Int) + + public var appearanceModelSize: (Int, Int) + + + public init( + _ poseId: TypedID, + measurement: Tensor, + classifier: MyClassifier, + patchSize: (Int, Int), + appearanceModelSize: (Int, Int) + ) { + self.edges = Tuple1(poseId) + self.measurement = ArrayImage(measurement) + self.classifier = classifier + self.patchSize = patchSize + self.appearanceModelSize = appearanceModelSize + } + + @differentiable + public func errorVector(_ pose: Pose2) -> Vector1 { + let region = OrientedBoundingBox(center: pose, rows: patchSize.0, cols: patchSize.1) + let patch = Tensor(measurement.patch(at: region, outputSize: appearanceModelSize).tensor) + let output = classifier.classify(patch.expandingShape(at: 0)).squeezingShape(at: 0) + + let sm = softmax(output) + let loglikelihood = -log(sm[1]) + log(sm[0]) + + var result = loglikelihood.scalarized() + return Vector1(result) + } +} diff --git a/Sources/SwiftFusion/Optimizers/GradientDescent.swift b/Sources/SwiftFusion/Optimizers/GradientDescent.swift index 17f3353b..f46582a2 100644 --- a/Sources/SwiftFusion/Optimizers/GradientDescent.swift +++ b/Sources/SwiftFusion/Optimizers/GradientDescent.swift @@ -17,15 +17,72 @@ import _Differentiation public struct GradientDescent { /// The fraction of the gradient to move per step. public var learningRate: Double + public var baseLearningRate: Double /// Creates an instance with the given `learningRate`. public init(learningRate: Double) { self.learningRate = learningRate + self.baseLearningRate = learningRate } - + /// Get the learning rate schedule based on the dataset size + /// + /// - Parameters: + /// - datasetSize: number of images in the current dataset + /// - Returns: learning rate schedule based on the current dataset + func getSchedule(datasetSize: Int) -> Array { + if datasetSize == 100 { + return [3, 6, 10, 100] + } + if datasetSize < 20000{ + return [100, 200, 300, 400, 500] + } + else if datasetSize < 500000 { + return [500, 3000, 6000, 9000, 10000] + } + else { + return [500, 6000, 12000, 18000, 20000] + } + } + /// Get learning rate at the current step given the dataset size and base learning rate + /// + /// - Parameters: + /// - step: current training step + /// - datasetSize: number of images in the dataset + /// - baseLearningRate: starting learning rate to modify + /// - Returns: learning rate at the current step in training + func getLearningRate(step: Int, datasetSize: Int, baseLearningRate: Float = 0.003) -> Float? { + let supports = getSchedule(datasetSize: datasetSize) + // Linear warmup + if step < supports[0] { + return baseLearningRate * Float(step) / Float(supports[0]) + } + // End of training + else if step >= supports.last! { + return nil + } + // Staircase decays by factor of 10 + else { + var baseLearningRate = baseLearningRate + for s in supports[1...] { + if s < step { + baseLearningRate = baseLearningRate / 10.0 + } + } + return baseLearningRate + } + } /// Moves `values` along the gradient of `objective`'s error function for a single gradient /// descent step. public func update(_ values: inout VariableAssignments, objective: FactorGraph) { values.move(along: -learningRate * objective.errorGradient(at: values)) } } + +extension GradientDescent : Optimizer { + public mutating func optimize(graph: FactorGraph, initial: inout VariableAssignments) { + for i in 0..<15 { + self.learningRate = Double(getLearningRate(step: i + 1, datasetSize: 100, baseLearningRate: Float(self.baseLearningRate))!) + self.update(&initial, objective: graph) + } + } +} \ No newline at end of file diff --git a/Sources/SwiftFusion/Optimizers/LM.swift b/Sources/SwiftFusion/Optimizers/LM.swift index 76a66193..7beed981 100644 --- a/Sources/SwiftFusion/Optimizers/LM.swift +++ b/Sources/SwiftFusion/Optimizers/LM.swift @@ -82,8 +82,8 @@ public struct LM { var inner_iter_step = 0 var inner_success = false var all_done = false - for _ in 0.. () + +} \ No newline at end of file diff --git a/Sources/SwiftFusion/Probability/MultivariateGaussian.swift b/Sources/SwiftFusion/Probability/MultivariateGaussian.swift index 7e96aa4f..33885a73 100644 --- a/Sources/SwiftFusion/Probability/MultivariateGaussian.swift +++ b/Sources/SwiftFusion/Probability/MultivariateGaussian.swift @@ -78,7 +78,7 @@ public struct MultivariateGaussian: GenerativeDensity { return t.scalarized() / 2.0 } - + /// Calculated normalized probability @differentiable public func probability(_ sample: T) -> Double { // - ToDo: Precalculate constant diff --git a/Tests/SwiftFusionTests/Optimizers/GradientDescentTests.swift b/Tests/SwiftFusionTests/Optimizers/GradientDescentTests.swift index 2ec763ed..de38d895 100644 --- a/Tests/SwiftFusionTests/Optimizers/GradientDescentTests.swift +++ b/Tests/SwiftFusionTests/Optimizers/GradientDescentTests.swift @@ -17,7 +17,7 @@ import SwiftFusion import XCTest final class GradientDescentTests: XCTestCase { - /// Test convergence for a simple Pose2SLAM graph. + // Test convergence for a simple Pose2SLAM graph. func testPose2SLAM() { var x = VariableAssignments() let pose1ID = x.store(Pose2(Rot2(0.2), Vector2(0.5, 0.0)))