diff --git a/Examples/OISTVisualizationTool/main.swift b/Examples/OISTVisualizationTool/main.swift
index fab3a70c..51a9a826 100644
--- a/Examples/OISTVisualizationTool/main.swift
+++ b/Examples/OISTVisualizationTool/main.swift
@@ -44,9 +44,9 @@ struct ViewFrame: ParsableCommand {
let image = dataset.loadFrame(frameRawId)!
- plot(image, boxes: dataset.labels[frameId].enumerated().map {
- (String($0), $1.location)
- }, margin: 10.0, scale: 0.5).show()
+ // plot(image, boxes: dataset.labels[frameId].enumerated().map {
+ // (String($0), $1.location)
+ // }, margin: 10.0, scale: 0.5).show()
}
}
@@ -118,7 +118,7 @@ struct RawTrack: ParsableCommand {
var tracker = makeRawPixelTracker(frames: videos, target: startPatch)
- if verbose { tracker.optimizer.verbosity = .SUMMARY }
+ // if verbose { tracker.optimizer.verbosity = .SUMMARY } For LM Optimizer
let prediction = tracker.infer(knownStart: Tuple1(startPose))
@@ -150,9 +150,9 @@ struct RawTrack: ParsableCommand {
print("Creating output plot")
}
startTimer("PLOTTING")
- plot(image, boxes: bboxes.indices.map {
- ("\($0)", bboxes[$0])
- }, margin: 10.0, scale: 0.5).show()
+ // plot(image, boxes: bboxes.indices.map {
+ // ("\($0)", bboxes[$0])
+ // }, margin: 10.0, scale: 0.5).show()
stopTimer("PLOTTING")
if verbose {
@@ -207,11 +207,11 @@ struct PpcaTrack: ParsableCommand {
startTimer("MAKE_GRAPH")
var tracker = makePPCATracker(model: ppca, statistics: statistics, frames: videos, targetSize: (40, 70))
stopTimer("MAKE_GRAPH")
+ // For LM Optimizer
+ // if verbose { tracker.optimizer.verbosity = .SUMMARY }
- if verbose { tracker.optimizer.verbosity = .SUMMARY }
-
- tracker.optimizer.cgls_precision = 1e-6
- tracker.optimizer.precision = 1e-2
+ // tracker.optimizer.cgls_precision = 1e-6
+ // tracker.optimizer.precision = 1e-2
startTimer("GRAPH_INFER")
let prediction = tracker.infer(knownStart: Tuple2(startPose, Vector10(flatTensor: startLatent)))
@@ -255,9 +255,9 @@ struct PpcaTrack: ParsableCommand {
print("Creating output plot")
}
startTimer("PLOTTING")
- plot(image, boxes: bboxes.indices.map {
- ("\($0)", bboxes[$0])
- }, margin: 10.0, scale: 0.5).show()
+ // plot(image, boxes: bboxes.indices.map {
+ // ("\($0)", bboxes[$0])
+ // }, margin: 10.0, scale: 0.5).show()
stopTimer("PLOTTING")
if verbose {
@@ -358,11 +358,12 @@ struct NaiveRae: ParsableCommand {
stopTimer("MAKE_GRAPH")
if verbose { print("Starting Optimization...") }
- if verbose { tracker.optimizer.verbosity = .SUMMARY }
+ // For LM Optimizer
+ // if verbose { tracker.optimizer.verbosity = .SUMMARY }
- tracker.optimizer.cgls_precision = 1e-7
- tracker.optimizer.precision = 1e-4
- tracker.optimizer.max_iteration = 200
+ // tracker.optimizer.cgls_precision = 1e-7
+ // tracker.optimizer.precision = 1e-4
+ // tracker.optimizer.max_iteration = 200
startTimer("GRAPH_INFER")
let prediction = tracker.infer(knownStart: Tuple1(startPose))
@@ -402,9 +403,9 @@ struct NaiveRae: ParsableCommand {
print("Creating output plot")
}
startTimer("PLOTTING")
- plot(image, boxes: bboxes.indices.map {
- ("\($0)", bboxes[$0])
- }, margin: 10.0, scale: 0.5).show()
+ // plot(image, boxes: bboxes.indices.map {
+ // ("\($0)", bboxes[$0])
+ // }, margin: 10.0, scale: 0.5).show()
stopTimer("PLOTTING")
if verbose {
@@ -578,11 +579,12 @@ struct NaivePca: ParsableCommand {
stopTimer("MAKE_GRAPH")
if verbose { print("Starting Optimization...") }
- if verbose { tracker.optimizer.verbosity = .SUMMARY }
+ // For LM Optimizer
+ // if verbose { tracker.optimizer.verbosity = .SUMMARY }
- tracker.optimizer.cgls_precision = 1e-7
- tracker.optimizer.precision = 1e-4
- tracker.optimizer.max_iteration = 200
+ // tracker.optimizer.cgls_precision = 1e-7
+ // tracker.optimizer.precision = 1e-4
+ // tracker.optimizer.max_iteration = 200
startTimer("GRAPH_INFER")
let prediction = tracker.infer(knownStart: Tuple1(startPose))
@@ -622,9 +624,9 @@ struct NaivePca: ParsableCommand {
print("Creating output plot")
}
startTimer("PLOTTING")
- plot(image, boxes: bboxes.indices.map {
- ("\($0)", bboxes[$0])
- }, margin: 10.0, scale: 0.5).show()
+ // plot(image, boxes: bboxes.indices.map {
+ // ("\($0)", bboxes[$0])
+ // }, margin: 10.0, scale: 0.5).show()
stopTimer("PLOTTING")
if verbose {
diff --git a/Package.resolved b/Package.resolved
index 03e8fe88..b25ddd9e 100644
--- a/Package.resolved
+++ b/Package.resolved
@@ -1,15 +1,6 @@
{
"object": {
"pins": [
- {
- "package": "CSV.swift",
- "repositoryURL": "https://github.com/yaslab/CSV.swift.git",
- "state": {
- "branch": null,
- "revision": "81d2874c51db364d7e1d71b0d99018a294c87ac1",
- "version": "2.4.3"
- }
- },
{
"package": "Penguin",
"repositoryURL": "https://github.com/saeta/penguin.git",
@@ -19,15 +10,6 @@
"version": null
}
},
- {
- "package": "Plotly",
- "repositoryURL": "https://github.com/vojtamolda/Plotly.swift",
- "state": {
- "branch": null,
- "revision": "6e80119ba37b913e5460459556e2bf58f02eba67",
- "version": "0.4.0"
- }
- },
{
"package": "swift-argument-parser",
"repositoryURL": "https://github.com/apple/swift-argument-parser.git",
@@ -46,24 +28,6 @@
"version": "0.1.0"
}
},
- {
- "package": "swift-models",
- "repositoryURL": "https://github.com/tensorflow/swift-models.git",
- "state": {
- "branch": null,
- "revision": "b2fc0325bf9d476bf2d7a4cd0a09d36486c506e4",
- "version": null
- }
- },
- {
- "package": "SwiftProtobuf",
- "repositoryURL": "https://github.com/apple/swift-protobuf.git",
- "state": {
- "branch": null,
- "revision": "da9a52be9cd36c63993291ce3f1b65dafcd1e826",
- "version": "1.14.0"
- }
- },
{
"package": "swift-tools-support-core",
"repositoryURL": "https://github.com/apple/swift-tools-support-core.git",
diff --git a/Package.swift b/Package.swift
index e7dc1074..762940f8 100644
--- a/Package.swift
+++ b/Package.swift
@@ -30,7 +30,6 @@ let package = Package(
.package(name: "TensorBoardX", url: "https://github.com/ProfFan/tensorboardx-s4tf.git", from: "0.1.3"),
.package(url: "https://github.com/apple/swift-tools-support-core.git", .branch("swift-5.2-branch")),
.package(url: "https://github.com/apple/swift-argument-parser.git", from: "0.3.0"),
- .package(name: "Plotly", url: "https://github.com/vojtamolda/Plotly.swift", from: "0.4.0"),
],
targets: [
// Targets are the basic building blocks of a package. A target can define a module or a test suite.
@@ -57,7 +56,6 @@ let package = Package(
name: "BeeDataset",
dependencies: [
"SwiftFusion",
- "Plotly",
"ModelSupport",
]),
.target(
@@ -86,7 +84,6 @@ let package = Package(
"BeeTracking",
.product(name: "PenguinParallelWithFoundation", package: "Penguin"),
"SwiftFusion",
- "Plotly",
.product(name: "ArgumentParser", package: "swift-argument-parser"),
],
path: "Examples/OISTVisualizationTool"),
@@ -97,7 +94,6 @@ let package = Package(
"BeeTracking",
.product(name: "PenguinParallelWithFoundation", package: "Penguin"),
"SwiftFusion",
- "Plotly",
.product(name: "ArgumentParser", package: "swift-argument-parser"),
],
path: "Scripts",
diff --git a/Scripts/Andrew01.swift b/Scripts/Andrew01.swift
index fdcf8297..051aabb1 100644
--- a/Scripts/Andrew01.swift
+++ b/Scripts/Andrew01.swift
@@ -11,14 +11,11 @@ import PenguinStructures
/// Andrew01: RAE Tracker
struct Andrew01: ParsableCommand {
- @Option(help: "Run on track number x")
- var trackId: Int = 0
-
@Option(help: "Run for number of frames")
var trackLength: Int = 80
@Option(help: "Size of feature space")
- var featureSize: Int = 5
+ var featureSize: Int = 256
@Option(help: "Pretrained weights")
var weightsFile: String?
@@ -27,10 +24,12 @@ struct Andrew01: ParsableCommand {
// Make sure you have a folder `Results/andrew01` before running
func run() {
let np = Python.import("numpy")
- let kHiddenDimension = 100
+ let kHiddenDimension = 512
let (imageHeight, imageWidth, imageChannels) =
(40, 70, 1)
+
+
var rae = DenseRAE(
imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels,
hiddenDimension: kHiddenDimension, latentDimension: featureSize
@@ -42,20 +41,25 @@ struct Andrew01: ParsableCommand {
rae.load(weights: np.load("./oist_rae_weight_\(featureSize).npy", allow_pickle: true))
}
+ // let (imageHeight, imageWidth, imageChannels) =
+ // (40, 70, 1)
+
+ // let rp = RandomProjection(fromShape: TensorShape([imageHeight, imageWidth, imageChannels]), toFeatureSize: featureSize)
+
+ let trainingDatasetSize = 100
+
let dataDir = URL(fileURLWithPath: "./OIST_Data")
- let data = OISTBeeVideo(directory: dataDir, length: 100)!
- let testData = OISTBeeVideo(directory: dataDir, afterIndex: 100, length: 80)!
+ let data = OISTBeeVideo(directory: dataDir, length: trainingDatasetSize)!
+ let testData = OISTBeeVideo(directory: dataDir, afterIndex: trainingDatasetSize, length: trackLength)!
let trackerEvaluation = TrackerEvaluationDataset(testData)
-
let evalTracker: Tracker = {frames, start in
- let trainingDatasetSize = 100
var tracker = trainProbabilisticTracker(
trainingData: data,
encoder: rae,
frames: frames,
boundingBoxSize: (40, 70),
- withFeatureSize: 5,
+ withFeatureSize: featureSize,
fgRandomFrameCount: trainingDatasetSize,
bgRandomFrameCount: trainingDatasetSize
)
@@ -64,32 +68,36 @@ struct Andrew01: ParsableCommand {
return track
}
+ let plt = Python.import("matplotlib.pyplot")
+ let sequenceCount = 1
+ var results = trackerEvaluation.evaluate(evalTracker, sequenceCount: sequenceCount, deltaAnchor: 175, outputFile: "andrew01")
- var results = trackerEvaluation.evaluate(evalTracker, sequenceCount: 5, deltaAnchor: 175, outputFile: "andrew01")
-
-
- for (index, value) in results.sequences.prefix(5).enumerated() {
+ for (index, value) in results.sequences.prefix(sequenceCount).enumerated() {
var i: Int = 0
zip(value.subsequences.first!.frames, zip(value.subsequences.first!.prediction, value.subsequences.first!.groundTruth)).map {
- let (fig, axes) = plotPatchWithGT(frame: $0.0, actual: $0.1.0.center, expected: $0.1.1.center)
+ let (fig, axes) = plotFrameWithPatches(frame: $0.0, actual: $0.1.0.center, expected: $0.1.1.center, firstGroundTruth: value.subsequences.first!.groundTruth.first!.center)
fig.savefig("Results/andrew01/sequence\(index)/andrew01_\(i).png", bbox_inches: "tight")
+ plt.close("all")
i = i + 1
}
- let plt = Python.import("matplotlib.pyplot")
+
+
let (fig, axes) = plt.subplots(1, 2, figsize: Python.tuple([20, 20])).tuple2
fig.suptitle("Tracking positions and Subsequence Average Overlap with Accuracy \(String(format: "%.2f", value.subsequences.first!.metrics.accuracy)) and Robustness \(value.subsequences.first!.metrics.robustness).")
value.subsequences.map {
- plotTrajectory(
- track: $0.prediction.map{$0.center}, withGroundTruth: $0.groundTruth.map{$0.center}, on: axes[0],
- withTrackColors: plt.cm.jet, withGtColors: plt.cm.gray
+ plotPoseDifference(
+ track: $0.prediction.map{$0.center}, withGroundTruth: $0.groundTruth.map{$0.center}, on: axes[0]
)
}
plotOverlap(
metrics: value.subsequences.first!.metrics, on: axes[1]
)
- fig.savefig("Results/andrew01/andrew01_subsequence\(index).pdf", bbox_inches: "tight")
+ fig.savefig("Results/andrew01/andrew01_subsequence\(index).png", bbox_inches: "tight")
+ print("Accuracy for sequence is \(value.sequenceMetrics.accuracy) with Robustness of \(value.sequenceMetrics.robustness)")
}
+
+ print("Accuracy for all sequences is \(results.trackerMetrics.accuracy) with Robustness of \(results.trackerMetrics.robustness)")
@@ -103,4 +111,4 @@ fileprivate func unpack(_ t: Tuple2) -> (A, B) {
/// Returns `t` as a Swift tuple.
fileprivate func unpack(_ t: Tuple1) -> (A) {
return (t.head)
-}
\ No newline at end of file
+}
diff --git a/Scripts/Andrew05.swift b/Scripts/Andrew05.swift
new file mode 100644
index 00000000..cedd68b5
--- /dev/null
+++ b/Scripts/Andrew05.swift
@@ -0,0 +1,120 @@
+import ArgumentParser
+
+import SwiftFusion
+import BeeDataset
+import BeeTracking
+import TensorFlow
+import PythonKit
+import Foundation
+
+import PenguinStructures
+
+/// Andrew01: RAE Tracker
+struct Andrew05: ParsableCommand {
+
+ @Option(help: "Run for number of frames")
+ var trackLength: Int = 80
+
+ @Option(help: "Size of feature space")
+ var featureSize: Int = 256
+
+ @Option(help: "Pretrained weights")
+ var weightsFile: String?
+
+
+ // Comparison SiamMask and RAE
+ func run() {
+ let np = Python.import("numpy")
+ let plt = Python.import("matplotlib.pyplot")
+ let pickle = Python.import("pickle")
+
+ let trainingDatasetSize = 100
+
+ let dataDir = URL(fileURLWithPath: "./OIST_Data")
+ let data = OISTBeeVideo(directory: dataDir, length: trainingDatasetSize)!
+ let testData = OISTBeeVideo(directory: dataDir, afterIndex: trainingDatasetSize, length: trackLength)!
+ let testData2 = OISTBeeVideo(directory: dataDir, afterIndex: trainingDatasetSize, length: trackLength)!
+
+ let trackerEvaluation = TrackerEvaluationDataset(testData)
+
+ var i = 0
+ let evalTrackerSiam: Tracker = {frames, start in
+ let decoder = JSONDecoder()
+ let trackPath = "Results/brando03/prediction_siammask_sequence_\(i).json"
+ let decodedTrack = try! decoder.decode([OrientedBoundingBox].self, from: Data(contentsOf: URL(fileURLWithPath: trackPath)))
+ i = i + 1
+ return decodedTrack
+ }
+ let sequenceCount = 19
+
+ var results_siam = trackerEvaluation.evaluate(evalTrackerSiam, sequenceCount: sequenceCount, deltaAnchor: 175, outputFile: "andrew02")
+ i = 0
+ let trackerEvaluation2 = TrackerEvaluationDataset(testData2)
+ let evalTrackerRae: Tracker = {frames, start in
+ let decoder = JSONDecoder()
+ let trackPath = "rae_256_updated_preds/prediction_rae_256_sequence_\(i).json"
+ let decodedTrack = try! decoder.decode([OrientedBoundingBox].self, from: Data(contentsOf: URL(fileURLWithPath: trackPath)))
+ i = i + 1
+ return decodedTrack
+ }
+ var results_rae = trackerEvaluation2.evaluate(evalTrackerRae, sequenceCount: sequenceCount, deltaAnchor: 175, outputFile: "andrew01")
+
+
+
+
+ // for (index, value) in results_rae.sequences.prefix(sequenceCount).enumerated() {
+ for j in 0...sequenceCount-1 {
+ let value_rae = results_rae.sequences.prefix(sequenceCount)[j]
+ let index = j
+ let value_siam = results_siam.sequences.prefix(sequenceCount)[j]
+ let value = value_rae
+ var i: Int = 0
+ // zip(value.subsequences.first!.frames, zip(value.subsequences.first!.prediction, value.subsequences.first!.groundTruth)).map {
+ // let (fig, axes) = plotFrameWithPatches(frame: $0.0, actual: $0.1.0.center, expected: $0.1.1.center, firstGroundTruth: value.subsequences.first!.groundTruth.first!.center)
+ // fig.savefig("Results/brando03/sequence\(index)/brando03\(i).png", bbox_inches: "tight")
+ // plt.close("all")
+ // i = i + 1
+ // }
+ zip(value_rae.subsequences.first!.frames, zip(zip(value_rae.subsequences.first!.prediction,value_siam.subsequences.first!.prediction), value_rae.subsequences.first!.groundTruth)).map {
+ // let (fig, axes) = plotFrameWithPatches(frame: $0.0, actual: $0.1.0.center, expected: $0.1.1.center, firstGroundTruth: value.subsequences.first!.groundTruth.first!.center)
+ let (fig, _) = plotFrameWithPatches2(frame: $0.0, actual_box1: $0.1.0.0, actual_box2: $0.1.0.1, expected: $0.1.1.center, firstGroundTruth: value_rae.subsequences.first!.groundTruth.first!.center)
+ fig.savefig("Results/andrew01/sequence\(index)/comparison_\(i).png", bbox_inches: "tight")
+ plt.close("all")
+ i = i + 1
+ }
+
+ // let (fig, axes) = plt.subplots(1, 2, figsize: Python.tuple([20, 20])).tuple2
+ // fig.suptitle("Tracking positions and Subsequence Average Overlap with Accuracy \(String(format: "%.2f", value_rae.subsequences.first!.metrics.accuracy)) and Robustness \(value_rae.subsequences.first!.metrics.robustness).")
+ // print("First Ground Truth")
+ // value_rae.subsequences.map {
+ // print($0.prediction.first!)
+ // $0.prediction.map{print("\(round($0.center.t.x)) \(round($0.center.t.y)) \($0.center.rot.theta) \(40) \(70)")}
+
+ // plotPoseDifference(
+ // track: $0.prediction.map{$0.center}, withGroundTruth: $0.groundTruth.map{$0.center}, on: axes[0]
+ // )
+ // }
+ // plotOverlap(
+ // metrics: value_rae.subsequences.first!.metrics, on: axes[1]
+ // )
+ // fig.savefig("Results/andrew01/andrew01_subsequence\(index).png", bbox_inches: "tight")
+ print("Accuracy for sequence is \(value_rae.sequenceMetrics.accuracy) with Robustness of \(value_rae.sequenceMetrics.robustness)")
+ }
+
+ // print("Accuracy for all sequences is \(results.trackerMetrics.accuracy) with Robustness of \(results.trackerMetrics.robustness)")
+ // let f = Python.open("Results/EAO/rae_em_\(featureSize).data", "wb")
+ // pickle.dump(results.expectedAverageOverlap.curve, f)
+
+
+ }
+
+}
+
+/// Returns `t` as a Swift tuple.
+fileprivate func unpack(_ t: Tuple2) -> (A, B) {
+ return (t.head, t.tail.head)
+}
+/// Returns `t` as a Swift tuple.
+fileprivate func unpack(_ t: Tuple1) -> (A) {
+ return (t.head)
+}
\ No newline at end of file
diff --git a/Scripts/Andrew06.swift b/Scripts/Andrew06.swift
new file mode 100644
index 00000000..337d4d4e
--- /dev/null
+++ b/Scripts/Andrew06.swift
@@ -0,0 +1,362 @@
+import ArgumentParser
+
+import SwiftFusion
+import BeeDataset
+import BeeTracking
+import TensorFlow
+import PythonKit
+import Foundation
+import ModelSupport
+
+import PenguinStructures
+
+let tf = Python.import("tensorflow")
+let np = Python.import("numpy")
+let pickle = Python.import("pickle")
+
+// Optional to enable GPU training
+// let _ = _ExecutionContext.global
+// let device = Device.defaultXLA
+let device = Device.default
+let modelName = "BiT-M-R50x1"
+var knownModels = [String: String]()
+let knownDatasetSizes:[String: (Int, Int)] = [
+ "bee_dataset": (40, 70)
+]
+
+public struct LabeledData {
+ /// The `data` of our sample (usually used as input for a model).
+ public let data: Data
+ /// The `label` of our sample (usually used as target for a model).
+ public let label: Label
+
+ /// Creates an instance from `data` and `label`.
+ public init(data: Data, label: Label) {
+ self.data = data
+ self.label = label
+ }
+}
+
+// Script to train and track with Big Transfer
+struct Andrew06: ParsableCommand {
+ /// This error indicates that BiT-Hyperrule cannot find the name of the dataset in the
+ /// knownDatasetSizes dictionary
+ enum DatasetNotFoundError: Error {
+ case invalidInput(String)
+ }
+
+ /// Return relevent ResNet enumerated type based on weights loaded
+ ///
+ /// - Parameters:
+ /// - modelName: the name of the model pulled from the big transfer repository
+ /// to grab the enumerated type for
+ /// - Returns: ResNet enumerated type for BigTransfer model
+ func getModelUnits(modelName: String) -> BigTransfer.Depth {
+ if modelName.contains("R50") {
+ return .resNet50
+ }
+ else if modelName.contains("R101") {
+ return .resNet101
+ }
+ else {
+ return .resNet152
+ }
+ }
+
+ /// Get updated image resolution based on the specifications in BiT-Hyperrule
+ ///
+ /// - Parameters:
+ /// - originalResolution: the source resolution for the current image dataset
+ /// - Returns: new resolution for images based on BiT-Hyperrule
+ func getResolution(originalResolution: (Int, Int)) -> (Int, Int) {
+ let area = originalResolution.0 * originalResolution.1
+ return area < 96*96 ? (160, 128) : (512, 480)
+ }
+
+ /// Get the source resolution for the current image dataset from the knownDatasetSizes dictionary
+ ///
+ /// - Parameters:
+ /// - datasetName: name of the current dataset you are using
+ /// - Returns: new resolution for specified dataset
+ /// - Throws:
+ /// - DatasetNotFoundError: will throw an error if the dataset cannot be found in knownDatasetSizes dictionary
+ func getResolutionFromDataset(datasetName: String) throws -> (Int, Int) {
+ if let resolution = knownDatasetSizes[datasetName] {
+ return getResolution(originalResolution: resolution)
+ }
+ print("Unsupported dataset " + datasetName + ". Add your own here :)")
+ throw DatasetNotFoundError.invalidInput(datasetName)
+
+ }
+
+ /// Get training mixup parameters based on Bit-Hyperrule specification for dataset sizes
+ ///
+ /// - Parameters:
+ /// - datasetSize: number of images in the current dataset
+ /// - Returns: mixup alpha based on number of images
+ func getMixUp(datasetSize: Int) -> Double {
+ return datasetSize < 20000 ? 0.0 : 0.1
+ }
+
+ /// Get the learning rate schedule based on the dataset size
+ ///
+ /// - Parameters:
+ /// - datasetSize: number of images in the current dataset
+ /// - Returns: learning rate schedule based on the current dataset
+ func getSchedule(datasetSize: Int) -> Array {
+ if datasetSize < 20000{
+ return [100, 200, 300, 400, 500]
+ }
+ else if datasetSize < 500000 {
+ return [500, 3000, 6000, 9000, 10000]
+ }
+ else {
+ return [500, 6000, 12000, 18000, 20000]
+ }
+ }
+
+ /// Get learning rate at the current step given the dataset size and base learning rate
+ ///
+ /// - Parameters:
+ /// - step: current training step
+ /// - datasetSize: number of images in the dataset
+ /// - baseLearningRate: starting learning rate to modify
+ /// - Returns: learning rate at the current step in training
+ func getLearningRate(step: Int, datasetSize: Int, baseLearningRate: Float = 0.003) -> Float? {
+ let supports = getSchedule(datasetSize: datasetSize)
+ // Linear warmup
+ if step < supports[0] {
+ return baseLearningRate * Float(step) / Float(supports[0])
+ }
+ // End of training
+ else if step >= supports.last! {
+ return nil
+ }
+ // Staircase decays by factor of 10
+ else {
+ var baseLearningRate = baseLearningRate
+ for s in supports[1...] {
+ if s < step {
+ baseLearningRate = baseLearningRate / 10.0
+ }
+ }
+ return baseLearningRate
+ }
+ }
+ public typealias Datum = (patch: Tensor, label: Tensor)
+ public typealias LabeledImage = LabeledData, Tensor>
+ public typealias Batches = Slices, label: Tensor)], ArraySlice>>
+
+ func getTrainingDataBigTransfer(
+ from dataset: OISTBeeVideo,
+ numberForeground: Int = 10000,
+ numberBackground: Int = 10000
+ ) -> [Datum] {
+ let bgBoxes = dataset.makeBackgroundBoundingBoxes(patchSize: (40, 70), batchSize: numberBackground).map {
+ (patch: Tensor($0.frame!.patch(at: $0.obb)).unstacked(alongAxis: 2)[0], label: Tensor(0))
+ }
+ let fgBoxes = dataset.makeForegroundBoundingBoxes(patchSize: (40, 70), batchSize: numberForeground).map {
+ (patch: Tensor($0.frame!.patch(at: $0.obb)).unstacked(alongAxis: 2)[0], label: Tensor(1))
+ }
+
+ var boxes = fgBoxes + bgBoxes
+ return boxes.map{(patch: Tensor(stacking: [$0.patch, $0.patch, $0.patch], alongAxis: 2), label: $0.label)}
+ }
+
+ /// Stores the training statistics for the BigTransfer training process which are different than usual
+ /// because the mixedup labels must be accounted for while running training statistics.
+ struct BigTransferTrainingStatistics {
+ var correctGuessCount = Tensor(0, on: Device.default)
+ var totalGuessCount = Tensor(0, on: Device.default)
+ var totalLoss = Tensor(0, on: Device.default)
+ var batches: Int = 0
+ var accuracy: Float {
+ Float(correctGuessCount.scalarized()) / Float(totalGuessCount.scalarized()) * 100
+ }
+ var averageLoss: Float { totalLoss.scalarized() / Float(batches) }
+
+ init(on device: Device = Device.default) {
+ correctGuessCount = Tensor(0, on: device)
+ totalGuessCount = Tensor(0, on: device)
+ totalLoss = Tensor(0, on: device)
+ }
+
+ mutating func update(logits: Tensor, labels: Tensor, loss: Tensor) {
+ let correct = logits.argmax(squeezingAxis: 1) .== labels.argmax(squeezingAxis: 1)
+ correctGuessCount += Tensor(correct).sum()
+ totalGuessCount += Int32(labels.shape[0])
+ totalLoss += loss
+ batches += 1
+ }
+ }
+
+ fileprivate func makeBatch(
+ samples: BatchSamples, device: Device) -> LabeledImage where BatchSamples.Element == (patch: Tensor, label: Tensor) {
+ let labels = Tensor(samples.map(\.label))
+ let imageTensor = Tensor(samples.map(\.patch))
+ return LabeledImage(data: imageTensor, label: labels)
+}
+ // Train Big Transfer
+ func run() {
+ let dataDir = URL(fileURLWithPath: "./OIST_Data")
+
+ let trainingDataset = OISTBeeVideo(directory: dataDir, length: 80)!
+ let validationDataset = OISTBeeVideo(directory: dataDir, afterIndex: 80, length: 20)!
+
+ let training = getTrainingDataBigTransfer(from: trainingDataset, numberForeground: 20000, numberBackground: 20000)
+ let validation = getTrainingDataBigTransfer(from: validationDataset, numberForeground: 600, numberBackground: 600)
+
+
+ let classCount = 2
+ var bitModel = BigTransfer(classCount: classCount, depth: getModelUnits(modelName: modelName), modelName: modelName)
+ let dataCount = 40000
+
+ var optimizer = SGD(for: bitModel, learningRate: 0.003, momentum: 0.9)
+ optimizer = SGD(copying: optimizer, to: device)
+
+ print("Beginning training...")
+ var batchSize: Int = 16
+ var currStep: Int = 1
+ let lrSupports = getSchedule(datasetSize: dataCount)
+ let scheduleLength = lrSupports.last!
+ let stepsPerEpoch = dataCount / batchSize
+ let epochCount = scheduleLength / stepsPerEpoch
+ let resizeSize = getResolution(originalResolution: (40, 70))
+
+ let trainingData = TrainingEpochs(samples: training, batchSize: batchSize).lazy.map {
+ (batches: Batches) -> LazyMapSequence in
+ return batches.lazy.map{ makeBatch(samples: $0, device: device) }
+ }
+
+ let validationData = validation.inBatches(of: batchSize).lazy.map {
+ makeBatch(samples: $0, device: device)
+ }
+
+ for (epoch, batches) in trainingData.prefix(epochCount).enumerated() {
+ let start = Date()
+ var trainStats = BigTransferTrainingStatistics(on: device)
+ var testStats = BigTransferTrainingStatistics(on: device)
+
+ Context.local.learningPhase = .training
+ for batch in batches {
+ if let newLearningRate = getLearningRate(step: currStep, datasetSize: dataCount, baseLearningRate: 0.003) {
+ optimizer.learningRate = newLearningRate
+ currStep = currStep + 1
+ }
+ else {
+ continue
+ }
+
+ var (eagerImages, eagerLabels) = (batch.data, batch.label)
+ let resized = resize(images: eagerImages, size: (resizeSize.0, resizeSize.1))
+ let flipped = tf.image.random_flip_left_right(resized.makeNumpyArray())
+ var newLabels = Tensor(Tensor(oneHotAtIndices: eagerLabels, depth: classCount))
+
+ let images = Tensor(copying: Tensor(numpy: flipped.numpy())!, to: device)
+ let labels = Tensor(copying: newLabels, to: device)
+ let 𝛁model = TensorFlow.gradient(at: bitModel) { bitModel -> Tensor in
+ let ŷ = bitModel(images)
+ let loss = softmaxCrossEntropy(logits: ŷ, probabilities: labels)
+ trainStats.update(logits: ŷ, labels: labels, loss: loss)
+ return loss
+ }
+
+ optimizer.update(&bitModel, along: 𝛁model)
+
+ LazyTensorBarrier()
+ }
+
+ print("Checking validation statistics...")
+ Context.local.learningPhase = .inference
+ for batch in validationData {
+ var (eagerImages, eagerLabels) = (batch.data, batch.label)
+ let resized = resize(images: eagerImages, size: (resizeSize.0, resizeSize.1))
+ let newLabels = Tensor(Tensor(oneHotAtIndices: eagerLabels, depth: classCount))
+ let images = Tensor(copying: resized, to: device)
+ let labels = Tensor(copying: newLabels, to: device)
+ let ŷ = bitModel(images)
+ let loss = softmaxCrossEntropy(logits: ŷ, probabilities: labels)
+ LazyTensorBarrier()
+ testStats.update(logits: ŷ, labels: labels, loss: loss)
+ }
+
+ print(
+ """
+ [Epoch \(epoch)] \
+ Training Loss: \(String(format: "%.3f", trainStats.averageLoss)), \
+ Training Accuracy: \(trainStats.correctGuessCount)/\(trainStats.totalGuessCount) \
+ (\(String(format: "%.1f", trainStats.accuracy))%), \
+ Test Loss: \(String(format: "%.3f", testStats.averageLoss)), \
+ Test Accuracy: \(testStats.correctGuessCount)/\(testStats.totalGuessCount) \
+ (\(String(format: "%.1f", testStats.accuracy))%) \
+ seconds per epoch: \(String(format: "%.1f", Date().timeIntervalSince(start)))
+ """)
+ }
+
+ let testData = OISTBeeVideo(directory: dataDir, afterIndex: 100, length: 80)!
+
+ let trackerEvaluation = TrackerEvaluationDataset(testData)
+
+
+
+ let evalTracker: Tracker = {frames, start in
+ var tracker = makeProbabilisticTracker2(
+ model: bitModel,
+ frames: frames,
+ targetSize: (40, 70)
+ )
+ let prediction = tracker.infer(knownStart: Tuple1(start.center), withSampling: true)
+ let track = tracker.frameVariableIDs.map { OrientedBoundingBox(center: prediction[unpack($0)], rows: 40, cols:70) }
+ return track
+
+ }
+
+ let plt = Python.import("matplotlib.pyplot")
+ let sequenceCount = 19
+ var results = trackerEvaluation.evaluate(evalTracker, sequenceCount: sequenceCount, deltaAnchor: 175, outputFile: "andrew01")
+
+ for (index, value) in results.sequences.prefix(sequenceCount).enumerated() {
+ var i: Int = 0
+ zip(value.subsequences.first!.frames, zip(value.subsequences.first!.prediction, value.subsequences.first!.groundTruth)).map {
+ let (fig, axes) = plotFrameWithPatches(frame: $0.0, actual: $0.1.0.center, expected: $0.1.1.center, firstGroundTruth: value.subsequences.first!.groundTruth.first!.center)
+ fig.savefig("Results/andrew01/sequence\(index)/andrew01_\(i).png", bbox_inches: "tight")
+ plt.close("all")
+ i = i + 1
+ }
+
+
+ let (fig, axes) = plt.subplots(1, 2, figsize: Python.tuple([20, 20])).tuple2
+ fig.suptitle("Tracking positions and Subsequence Average Overlap with Accuracy \(String(format: "%.2f", value.subsequences.first!.metrics.accuracy)) and Robustness \(value.subsequences.first!.metrics.robustness).")
+
+ value.subsequences.map {
+ let encoder = JSONEncoder()
+ let data = try! encoder.encode($0.prediction)
+ FileManager.default.createFile(atPath: "prediction_bigtransfer_sequence_\(index).json", contents: data, attributes: nil)
+ plotPoseDifference(
+ track: $0.prediction.map{$0.center}, withGroundTruth: $0.groundTruth.map{$0.center}, on: axes[0]
+ )
+ }
+ plotOverlap(
+ metrics: value.subsequences.first!.metrics, on: axes[1]
+ )
+ fig.savefig("Results/andrew01/andrew01_subsequence\(index).png", bbox_inches: "tight")
+ print("Accuracy for sequence is \(value.sequenceMetrics.accuracy) with Robustness of \(value.sequenceMetrics.robustness)")
+ }
+
+ print("Accuracy for all sequences is \(results.trackerMetrics.accuracy) with Robustness of \(results.trackerMetrics.robustness)")
+ let f = Python.open("Results/EAO/bigtransfer.data", "wb")
+ pickle.dump(results.expectedAverageOverlap.curve, f)
+
+
+ }
+
+}
+
+/// Returns `t` as a Swift tuple.
+fileprivate func unpack(_ t: Tuple2) -> (A, B) {
+ return (t.head, t.tail.head)
+}
+/// Returns `t` as a Swift tuple.
+fileprivate func unpack(_ t: Tuple1) -> (A) {
+ return (t.head)
+}
\ No newline at end of file
diff --git a/Scripts/Andrew07.swift b/Scripts/Andrew07.swift
new file mode 100644
index 00000000..83eb3ddb
--- /dev/null
+++ b/Scripts/Andrew07.swift
@@ -0,0 +1,342 @@
+import ArgumentParser
+
+import SwiftFusion
+import BeeDataset
+import BeeTracking
+import TensorFlow
+import PythonKit
+import Foundation
+import ModelSupport
+
+import PenguinStructures
+
+// Error gradient visualization script for Big Transfer
+struct Andrew07: ParsableCommand {
+ /// This error indicates that BiT-Hyperrule cannot find the name of the dataset in the
+ /// knownDatasetSizes dictionary
+ enum DatasetNotFoundError: Error {
+ case invalidInput(String)
+ }
+
+ /// Return relevent ResNet enumerated type based on weights loaded
+ ///
+ /// - Parameters:
+ /// - modelName: the name of the model pulled from the big transfer repository
+ /// to grab the enumerated type for
+ /// - Returns: ResNet enumerated type for BigTransfer model
+ func getModelUnits(modelName: String) -> BigTransfer.Depth {
+ if modelName.contains("R50") {
+ return .resNet50
+ }
+ else if modelName.contains("R101") {
+ return .resNet101
+ }
+ else {
+ return .resNet152
+ }
+ }
+
+ /// Get updated image resolution based on the specifications in BiT-Hyperrule
+ ///
+ /// - Parameters:
+ /// - originalResolution: the source resolution for the current image dataset
+ /// - Returns: new resolution for images based on BiT-Hyperrule
+ func getResolution(originalResolution: (Int, Int)) -> (Int, Int) {
+ let area = originalResolution.0 * originalResolution.1
+ return area < 96*96 ? (160, 128) : (512, 480)
+ }
+
+ /// Get the source resolution for the current image dataset from the knownDatasetSizes dictionary
+ ///
+ /// - Parameters:
+ /// - datasetName: name of the current dataset you are using
+ /// - Returns: new resolution for specified dataset
+ /// - Throws:
+ /// - DatasetNotFoundError: will throw an error if the dataset cannot be found in knownDatasetSizes dictionary
+ func getResolutionFromDataset(datasetName: String) throws -> (Int, Int) {
+ if let resolution = knownDatasetSizes[datasetName] {
+ return getResolution(originalResolution: resolution)
+ }
+ print("Unsupported dataset " + datasetName + ". Add your own here :)")
+ throw DatasetNotFoundError.invalidInput(datasetName)
+
+ }
+
+ /// Get training mixup parameters based on Bit-Hyperrule specification for dataset sizes
+ ///
+ /// - Parameters:
+ /// - datasetSize: number of images in the current dataset
+ /// - Returns: mixup alpha based on number of images
+ func getMixUp(datasetSize: Int) -> Double {
+ return datasetSize < 20000 ? 0.0 : 0.1
+ }
+
+ /// Get the learning rate schedule based on the dataset size
+ ///
+ /// - Parameters:
+ /// - datasetSize: number of images in the current dataset
+ /// - Returns: learning rate schedule based on the current dataset
+ func getSchedule(datasetSize: Int) -> Array {
+ if datasetSize < 20000{
+ return [100, 200, 300, 400, 500]
+ }
+ else if datasetSize < 500000 {
+ return [500, 3000, 6000, 9000, 10000]
+ }
+ else {
+ return [500, 6000, 12000, 18000, 20000]
+ }
+ }
+
+ /// Get learning rate at the current step given the dataset size and base learning rate
+ ///
+ /// - Parameters:
+ /// - step: current training step
+ /// - datasetSize: number of images in the dataset
+ /// - baseLearningRate: starting learning rate to modify
+ /// - Returns: learning rate at the current step in training
+ func getLearningRate(step: Int, datasetSize: Int, baseLearningRate: Float = 0.003) -> Float? {
+ let supports = getSchedule(datasetSize: datasetSize)
+ // Linear warmup
+ if step < supports[0] {
+ return baseLearningRate * Float(step) / Float(supports[0])
+ }
+ // End of training
+ else if step >= supports.last! {
+ return nil
+ }
+ // Staircase decays by factor of 10
+ else {
+ var baseLearningRate = baseLearningRate
+ for s in supports[1...] {
+ if s < step {
+ baseLearningRate = baseLearningRate / 10.0
+ }
+ }
+ return baseLearningRate
+ }
+ }
+ public typealias Datum = (patch: Tensor, label: Tensor)
+ public typealias LabeledImage = LabeledData, Tensor>
+ public typealias Batches = Slices, label: Tensor)], ArraySlice>>
+
+ func getTrainingDataBigTransfer(
+ from dataset: OISTBeeVideo,
+ numberForeground: Int = 10000,
+ numberBackground: Int = 10000
+ ) -> [Datum] {
+ let bgBoxes = dataset.makeBackgroundBoundingBoxes(patchSize: (40, 70), batchSize: numberBackground).map {
+ (patch: Tensor($0.frame!.patch(at: $0.obb)).unstacked(alongAxis: 2)[0], label: Tensor(0))
+ }
+ let fgBoxes = dataset.makeForegroundBoundingBoxes(patchSize: (40, 70), batchSize: numberForeground).map {
+ (patch: Tensor($0.frame!.patch(at: $0.obb)).unstacked(alongAxis: 2)[0], label: Tensor(1))
+ }
+
+ var boxes = fgBoxes + bgBoxes
+ return boxes.map{(patch: Tensor(stacking: [$0.patch, $0.patch, $0.patch], alongAxis: 2), label: $0.label)}
+ }
+
+ /// Stores the training statistics for the BigTransfer training process which are different than usual
+ /// because the mixedup labels must be accounted for while running training statistics.
+ struct BigTransferTrainingStatistics {
+ var correctGuessCount = Tensor(0, on: Device.default)
+ var totalGuessCount = Tensor(0, on: Device.default)
+ var totalLoss = Tensor(0, on: Device.default)
+ var batches: Int = 0
+ var accuracy: Float {
+ Float(correctGuessCount.scalarized()) / Float(totalGuessCount.scalarized()) * 100
+ }
+ var averageLoss: Float { totalLoss.scalarized() / Float(batches) }
+
+ init(on device: Device = Device.default) {
+ correctGuessCount = Tensor(0, on: device)
+ totalGuessCount = Tensor(0, on: device)
+ totalLoss = Tensor(0, on: device)
+ }
+
+ mutating func update(logits: Tensor, labels: Tensor, loss: Tensor) {
+ let correct = logits.argmax(squeezingAxis: 1) .== labels.argmax(squeezingAxis: 1)
+ correctGuessCount += Tensor(correct).sum()
+ totalGuessCount += Int32(labels.shape[0])
+ totalLoss += loss
+ batches += 1
+ }
+ }
+
+ fileprivate func makeBatch(
+ samples: BatchSamples, device: Device) -> LabeledImage where BatchSamples.Element == (patch: Tensor, label: Tensor) {
+ let labels = Tensor(samples.map(\.label))
+ let imageTensor = Tensor(samples.map(\.patch))
+ return LabeledImage(data: imageTensor, label: labels)
+}
+ // Train Big Transfer
+ func run() {
+ let plt = Python.import("matplotlib.pyplot")
+ let dataDir = URL(fileURLWithPath: "./OIST_Data")
+
+ let trainingDataset = OISTBeeVideo(directory: dataDir, length: 80)!
+ let validationDataset = OISTBeeVideo(directory: dataDir, afterIndex: 80, length: 20)!
+
+ let training = getTrainingDataBigTransfer(from: trainingDataset, numberForeground: 3000, numberBackground: 3000)
+ let validation = getTrainingDataBigTransfer(from: validationDataset, numberForeground: 600, numberBackground: 600)
+
+
+ let classCount = 2
+ var bitModel = BigTransfer(classCount: classCount, depth: getModelUnits(modelName: modelName), modelName: modelName)
+ let dataCount = 6000
+
+ var optimizer = SGD(for: bitModel, learningRate: 0.003, momentum: 0.9)
+ optimizer = SGD(copying: optimizer, to: device)
+
+ print("Beginning training...")
+ var batchSize: Int = 16
+ var currStep: Int = 1
+ let lrSupports = getSchedule(datasetSize: dataCount)
+ let scheduleLength = lrSupports.last!
+ let stepsPerEpoch = dataCount / batchSize
+ let epochCount = scheduleLength / stepsPerEpoch
+ let resizeSize = getResolution(originalResolution: (40, 70))
+
+ let trainingData = TrainingEpochs(samples: training, batchSize: batchSize).lazy.map {
+ (batches: Batches) -> LazyMapSequence in
+ return batches.lazy.map{ makeBatch(samples: $0, device: device) }
+ }
+
+ let validationData = validation.inBatches(of: batchSize).lazy.map {
+ makeBatch(samples: $0, device: device)
+ }
+
+ for (epoch, batches) in trainingData.prefix(epochCount).enumerated() {
+ let start = Date()
+ var trainStats = BigTransferTrainingStatistics(on: device)
+ var testStats = BigTransferTrainingStatistics(on: device)
+
+ Context.local.learningPhase = .training
+ for batch in batches {
+ if let newLearningRate = getLearningRate(step: currStep, datasetSize: dataCount, baseLearningRate: 0.003) {
+ optimizer.learningRate = newLearningRate
+ currStep = currStep + 1
+ }
+ else {
+ continue
+ }
+
+ var (eagerImages, eagerLabels) = (batch.data, batch.label)
+ let resized = resize(images: eagerImages, size: (resizeSize.0, resizeSize.1))
+ //let cropped = tf.image.random_crop(resized.makeNumpyArray(), [batchSize, resizeSize.0, resizeSize.1, 3])
+ let flipped = tf.image.random_flip_left_right(resized.makeNumpyArray())
+ var newLabels = Tensor(Tensor(oneHotAtIndices: eagerLabels, depth: classCount))
+
+ let images = Tensor(copying: Tensor(numpy: flipped.numpy())!, to: device)
+ let labels = Tensor(copying: newLabels, to: device)
+ let 𝛁model = TensorFlow.gradient(at: bitModel) { bitModel -> Tensor in
+ let ŷ = bitModel(images)
+ let loss = softmaxCrossEntropy(logits: ŷ, probabilities: labels)
+ trainStats.update(logits: ŷ, labels: labels, loss: loss)
+ return loss
+ }
+
+ optimizer.update(&bitModel, along: 𝛁model)
+
+ LazyTensorBarrier()
+ }
+
+ print("Checking validation statistics...")
+ Context.local.learningPhase = .inference
+ for batch in validationData {
+ var (eagerImages, eagerLabels) = (batch.data, batch.label)
+ let resized = resize(images: eagerImages, size: (resizeSize.0, resizeSize.1))
+ let newLabels = Tensor(Tensor(oneHotAtIndices: eagerLabels, depth: classCount))
+ let images = Tensor(copying: resized, to: device)
+ let labels = Tensor(copying: newLabels, to: device)
+ let ŷ = bitModel(images)
+ let loss = softmaxCrossEntropy(logits: ŷ, probabilities: labels)
+ LazyTensorBarrier()
+ testStats.update(logits: ŷ, labels: labels, loss: loss)
+ }
+
+ print(
+ """
+ [Epoch \(epoch)] \
+ Training Loss: \(String(format: "%.3f", trainStats.averageLoss)), \
+ Training Accuracy: \(trainStats.correctGuessCount)/\(trainStats.totalGuessCount) \
+ (\(String(format: "%.1f", trainStats.accuracy))%), \
+ Test Loss: \(String(format: "%.3f", testStats.averageLoss)), \
+ Test Accuracy: \(testStats.correctGuessCount)/\(testStats.totalGuessCount) \
+ (\(String(format: "%.1f", testStats.accuracy))%) \
+ seconds per epoch: \(String(format: "%.1f", Date().timeIntervalSince(start)))
+ """)
+ }
+
+ let testData = OISTBeeVideo(directory: dataDir, afterIndex: 100, length: 80)!
+
+ let trackerEvaluation = TrackerEvaluationDataset(testData)
+
+ let frames = testData.frames
+ let firstTrack = testData.tracks[0]
+ let firstFrame = frames[0]
+ let firstObb = firstTrack.boxes[0]
+
+ let range = 100.0
+
+ let firstGroundTruth = firstObb.center
+ print("oBB coordinates", firstGroundTruth.t.x, firstGroundTruth.t.y)
+
+ let (fig, axs) = plt.subplots(1,2).tuple2
+ let fr = np.squeeze(firstFrame.makeNumpyArray())
+ axs[0].imshow(fr / 255.0, cmap: "gray")
+
+
+ axs[0].set_xlim(firstGroundTruth.t.x - range/2, firstGroundTruth.t.x + range/2)
+ axs[0].set_ylim(firstGroundTruth.t.y - range/2, firstGroundTruth.t.y + range/2)
+ axs[1].set_xlim(0, range)
+ axs[1].set_ylim(0, range)
+
+ let x = firstGroundTruth.t.x
+ let y = firstGroundTruth.t.y
+
+ var values = Tensor(zeros: [Int(range), Int(range)])
+
+ for i in 0...Int(range)-1 {
+ for j in 0...Int(range)-1 {
+ let t = Vector2(x-range/2+Double(i), y-range/2+Double(j))
+ let p = Pose2(firstGroundTruth.rot, t)
+ var v = VariableAssignments()
+ let poseId = v.store(p)
+ let startpose = v[poseId]
+ var fg = FactorGraph()
+ let factorNNC = ProbablisticTrackingFactor2(poseId,
+ measurement: firstFrame,
+ classifier: bitModel,
+ patchSize: (40, 70),
+ appearanceModelSize: (40, 70)
+ )
+ fg.store(factorNNC)
+ values[i,j] = Tensor(factorNNC.errorVector(v[poseId]).x)
+ print(j)
+ print(i)
+ }
+ }
+ let min_val = values.min()
+ if Double(min_val)! < 0 {
+ values = values-min_val
+ }
+ values = values/values.max()*255
+ print(values[0...,0])
+ print(values.shape)
+ axs[1].imshow(values.makeNumpyArray())
+
+ fig.savefig("./Results/andrew01/vizual_NNC.png", bbox_inches: "tight")
+ }
+
+
+}
+
+/// Returns `t` as a Swift tuple.
+fileprivate func unpack(_ t: Tuple2) -> (A, B) {
+ return (t.head, t.tail.head)
+}
+/// Returns `t` as a Swift tuple.
+fileprivate func unpack(_ t: Tuple1) -> (A) {
+ return (t.head)
+}
\ No newline at end of file
diff --git a/Scripts/Andrew08.swift b/Scripts/Andrew08.swift
new file mode 100644
index 00000000..d9c24018
--- /dev/null
+++ b/Scripts/Andrew08.swift
@@ -0,0 +1,439 @@
+import ArgumentParser
+
+import SwiftFusion
+import BeeDataset
+import BeeTracking
+import TensorFlow
+import PythonKit
+import Foundation
+import ModelSupport
+
+import PenguinStructures
+
+
+struct Andrew08: ParsableCommand {
+ /// This error indicates that BiT-Hyperrule cannot find the name of the dataset in the
+ /// knownDatasetSizes dictionary
+ enum DatasetNotFoundError: Error {
+ case invalidInput(String)
+ }
+ func initialize_and_perturb(p: Pose2) -> (Double, Double, Double, Pose2, VariableAssignments, TypedID, FactorGraph) {
+ var v = VariableAssignments()
+ let poseId = v.store(p)
+ v[poseId].perturbWith(stddev: Vector3(0.3, 8, 4.6))
+ let dx = v[poseId].t.x - p.t.x
+ let dy = v[poseId].t.y - p.t.y
+ let dtheta = v[poseId].rot.theta - p.rot.theta
+ let startpose = v[poseId]
+ let fg = FactorGraph()
+
+ return (dx, dy, dtheta, startpose, v, poseId, fg)
+ }
+
+ func initialize_empty_arrays() -> (Bool, [Double], [Double], [Double], [Double]) {
+ var conv = true
+ var errors = [Double]()
+ var xs = [Double]()
+ var ys = [Double]()
+ var thetas = [Double]()
+ return (conv, errors, xs, ys, thetas)
+ }
+ /// Return relevent ResNet enumerated type based on weights loaded
+ ///
+ /// - Parameters:
+ /// - modelName: the name of the model pulled from the big transfer repository
+ /// to grab the enumerated type for
+ /// - Returns: ResNet enumerated type for BigTransfer model
+ func getModelUnits(modelName: String) -> BigTransfer.Depth {
+ if modelName.contains("R50") {
+ return .resNet50
+ }
+ else if modelName.contains("R101") {
+ return .resNet101
+ }
+ else {
+ return .resNet152
+ }
+ }
+
+ /// Get updated image resolution based on the specifications in BiT-Hyperrule
+ ///
+ /// - Parameters:
+ /// - originalResolution: the source resolution for the current image dataset
+ /// - Returns: new resolution for images based on BiT-Hyperrule
+ func getResolution(originalResolution: (Int, Int)) -> (Int, Int) {
+ let area = originalResolution.0 * originalResolution.1
+ return area < 96*96 ? (160, 128) : (512, 480)
+ }
+
+ /// Get the source resolution for the current image dataset from the knownDatasetSizes dictionary
+ ///
+ /// - Parameters:
+ /// - datasetName: name of the current dataset you are using
+ /// - Returns: new resolution for specified dataset
+ /// - Throws:
+ /// - DatasetNotFoundError: will throw an error if the dataset cannot be found in knownDatasetSizes dictionary
+ func getResolutionFromDataset(datasetName: String) throws -> (Int, Int) {
+ if let resolution = knownDatasetSizes[datasetName] {
+ return getResolution(originalResolution: resolution)
+ }
+ print("Unsupported dataset " + datasetName + ". Add your own here :)")
+ throw DatasetNotFoundError.invalidInput(datasetName)
+
+ }
+
+ /// Get training mixup parameters based on Bit-Hyperrule specification for dataset sizes
+ ///
+ /// - Parameters:
+ /// - datasetSize: number of images in the current dataset
+ /// - Returns: mixup alpha based on number of images
+ func getMixUp(datasetSize: Int) -> Double {
+ return datasetSize < 20000 ? 0.0 : 0.1
+ }
+
+ /// Get the learning rate schedule based on the dataset size
+ ///
+ /// - Parameters:
+ /// - datasetSize: number of images in the current dataset
+ /// - Returns: learning rate schedule based on the current dataset
+ func getSchedule(datasetSize: Int) -> Array {
+ if datasetSize == 100 {
+ return [25, 50, 75, 100]
+ }
+ if datasetSize < 20000{
+ return [100, 200, 300, 400, 500]
+ }
+ else if datasetSize < 500000 {
+ return [500, 3000, 6000, 9000, 10000]
+ }
+ else {
+ return [500, 6000, 12000, 18000, 20000]
+ }
+ }
+
+ /// Get learning rate at the current step given the dataset size and base learning rate
+ ///
+ /// - Parameters:
+ /// - step: current training step
+ /// - datasetSize: number of images in the dataset
+ /// - baseLearningRate: starting learning rate to modify
+ /// - Returns: learning rate at the current step in training
+ func getLearningRate(step: Int, datasetSize: Int, baseLearningRate: Float = 0.003) -> Float? {
+ let supports = getSchedule(datasetSize: datasetSize)
+ // Linear warmup
+ print(step)
+ print(supports)
+ if step < supports[0] {
+ return baseLearningRate * Float(step) / Float(supports[0])
+ }
+ // End of training
+ else if step >= supports.last! {
+ return nil
+ }
+ // Staircase decays by factor of 10
+ else {
+ var baseLearningRate = baseLearningRate
+ for s in supports[1...] {
+ if s < step {
+ baseLearningRate = baseLearningRate / 10.0
+ }
+ }
+ return baseLearningRate
+ }
+ }
+ public typealias Datum = (patch: Tensor, label: Tensor)
+ public typealias LabeledImage = LabeledData, Tensor>
+ public typealias Batches = Slices, label: Tensor)], ArraySlice>>
+
+ func getTrainingDataBigTransfer(
+ from dataset: OISTBeeVideo,
+ numberForeground: Int = 10000,
+ numberBackground: Int = 10000
+ ) -> [Datum] {
+ let bgBoxes = dataset.makeBackgroundBoundingBoxes(patchSize: (40, 70), batchSize: numberBackground).map {
+ (patch: Tensor($0.frame!.patch(at: $0.obb)).unstacked(alongAxis: 2)[0], label: Tensor(0))
+ }
+ let fgBoxes = dataset.makeForegroundBoundingBoxes(patchSize: (40, 70), batchSize: numberForeground).map {
+ (patch: Tensor($0.frame!.patch(at: $0.obb)).unstacked(alongAxis: 2)[0], label: Tensor(1))
+ }
+
+ var boxes = fgBoxes + bgBoxes
+ return boxes.map{(patch: Tensor(stacking: [$0.patch, $0.patch, $0.patch], alongAxis: 2), label: $0.label)}
+ }
+
+ /// Stores the training statistics for the BigTransfer training process which are different than usual
+ /// because the mixedup labels must be accounted for while running training statistics.
+ struct BigTransferTrainingStatistics {
+ var correctGuessCount = Tensor(0, on: Device.default)
+ var totalGuessCount = Tensor(0, on: Device.default)
+ var totalLoss = Tensor(0, on: Device.default)
+ var batches: Int = 0
+ var accuracy: Float {
+ Float(correctGuessCount.scalarized()) / Float(totalGuessCount.scalarized()) * 100
+ }
+ var averageLoss: Float { totalLoss.scalarized() / Float(batches) }
+
+ init(on device: Device = Device.default) {
+ correctGuessCount = Tensor(0, on: device)
+ totalGuessCount = Tensor(0, on: device)
+ totalLoss = Tensor(0, on: device)
+ }
+
+ mutating func update(logits: Tensor, labels: Tensor, loss: Tensor) {
+ let correct = logits.argmax(squeezingAxis: 1) .== labels.argmax(squeezingAxis: 1)
+ correctGuessCount += Tensor(correct).sum()
+ totalGuessCount += Int32(labels.shape[0])
+ totalLoss += loss
+ batches += 1
+ }
+ }
+
+ fileprivate func makeBatch(
+ samples: BatchSamples, device: Device) -> LabeledImage where BatchSamples.Element == (patch: Tensor, label: Tensor) {
+ let labels = Tensor(samples.map(\.label))
+ let imageTensor = Tensor(samples.map(\.patch))
+ return LabeledImage(data: imageTensor, label: labels)
+}
+ // Train Big Transfer
+ func run() {
+ let plt = Python.import("matplotlib.pyplot")
+ let dataDir = URL(fileURLWithPath: "./OIST_Data")
+
+ let trainingDataset = OISTBeeVideo(directory: dataDir, length: 80)!
+ let validationDataset = OISTBeeVideo(directory: dataDir, afterIndex: 80, length: 20)!
+
+ let training = getTrainingDataBigTransfer(from: trainingDataset, numberForeground: 256, numberBackground: 256)
+ let validation = getTrainingDataBigTransfer(from: validationDataset, numberForeground: 600, numberBackground: 600)
+
+
+ let classCount = 2
+ var bitModel = BigTransfer(classCount: classCount, depth: getModelUnits(modelName: modelName), modelName: modelName)
+ let dataCount = 6000
+
+ var optimizer = SGD(for: bitModel, learningRate: 0.003, momentum: 0.9)
+ optimizer = SGD(copying: optimizer, to: device)
+
+ print("Beginning training...")
+ var batchSize: Int = 16
+ var currStep: Int = 1
+ let lrSupports = getSchedule(datasetSize: dataCount)
+ let scheduleLength = lrSupports.last!
+ let stepsPerEpoch = dataCount / batchSize
+ let epochCount = scheduleLength / stepsPerEpoch
+ let resizeSize = getResolution(originalResolution: (40, 70))
+
+ let trainingData = TrainingEpochs(samples: training, batchSize: batchSize).lazy.map {
+ (batches: Batches) -> LazyMapSequence in
+ return batches.lazy.map{ makeBatch(samples: $0, device: device) }
+ }
+
+ let validationData = validation.inBatches(of: batchSize).lazy.map {
+ makeBatch(samples: $0, device: device)
+ }
+
+ for (epoch, batches) in trainingData.prefix(epochCount).enumerated() {
+ let start = Date()
+ var trainStats = BigTransferTrainingStatistics(on: device)
+ var testStats = BigTransferTrainingStatistics(on: device)
+
+ Context.local.learningPhase = .training
+ for batch in batches {
+ if let newLearningRate = getLearningRate(step: currStep, datasetSize: dataCount, baseLearningRate: 0.003) {
+ optimizer.learningRate = newLearningRate
+ currStep = currStep + 1
+ }
+ else {
+ continue
+ }
+
+ var (eagerImages, eagerLabels) = (batch.data, batch.label)
+ let resized = resize(images: eagerImages, size: (resizeSize.0, resizeSize.1))
+ let flipped = tf.image.random_flip_left_right(resized.makeNumpyArray())
+ var newLabels = Tensor(Tensor(oneHotAtIndices: eagerLabels, depth: classCount))
+
+ let images = Tensor(copying: Tensor(numpy: flipped.numpy())!, to: device)
+ let labels = Tensor(copying: newLabels, to: device)
+ let 𝛁model = TensorFlow.gradient(at: bitModel) { bitModel -> Tensor in
+ let ŷ = bitModel(images)
+ let loss = softmaxCrossEntropy(logits: ŷ, probabilities: labels)
+ trainStats.update(logits: ŷ, labels: labels, loss: loss)
+ return loss
+ }
+
+ optimizer.update(&bitModel, along: 𝛁model)
+
+ LazyTensorBarrier()
+ }
+
+ print("Checking validation statistics...")
+ Context.local.learningPhase = .inference
+ for batch in validationData {
+ var (eagerImages, eagerLabels) = (batch.data, batch.label)
+ let resized = resize(images: eagerImages, size: (resizeSize.0, resizeSize.1))
+ let newLabels = Tensor(Tensor(oneHotAtIndices: eagerLabels, depth: classCount))
+ let images = Tensor(copying: resized, to: device)
+ let labels = Tensor(copying: newLabels, to: device)
+ let ŷ = bitModel(images)
+ let loss = softmaxCrossEntropy(logits: ŷ, probabilities: labels)
+ LazyTensorBarrier()
+ testStats.update(logits: ŷ, labels: labels, loss: loss)
+ }
+
+ print(
+ """
+ [Epoch \(epoch)] \
+ Training Loss: \(String(format: "%.3f", trainStats.averageLoss)), \
+ Training Accuracy: \(trainStats.correctGuessCount)/\(trainStats.totalGuessCount) \
+ (\(String(format: "%.1f", trainStats.accuracy))%), \
+ Test Loss: \(String(format: "%.3f", testStats.averageLoss)), \
+ Test Accuracy: \(testStats.correctGuessCount)/\(testStats.totalGuessCount) \
+ (\(String(format: "%.1f", testStats.accuracy))%) \
+ seconds per epoch: \(String(format: "%.1f", Date().timeIntervalSince(start)))
+ """)
+ }
+
+ let testData = OISTBeeVideo(directory: dataDir, afterIndex: 100, length: 80)!
+
+ let trackerEvaluation = TrackerEvaluationDataset(testData)
+
+ let frames = testData.frames
+ let firstTrack = testData.tracks[0]
+ let firstFrame = frames[0]
+ let firstObb = firstTrack.boxes[0]
+
+ let lr = 100.0
+ var GDOptimizer = GradientDescent(learningRate: lr)
+ let it_limit = 80
+
+
+ let folderName = "Results/GD_optimization_BiT_lr_\(lr)__10_22_2021_final_images_4subplots"
+ if !FileManager.default.fileExists(atPath: folderName) {
+ do {
+ try FileManager.default.createDirectory(atPath: folderName, withIntermediateDirectories: true, attributes: nil)
+ } catch {
+ print(error.localizedDescription)
+ }
+ }
+
+ print("hello1")
+ let (fig, axs) = plt.subplots(2,2).tuple2
+ let fr = np.squeeze(firstFrame.makeNumpyArray())
+ for i in 0...1 {
+ for j in 0...1 {
+ axs[i,j].imshow(fr / 255.0, cmap: "gray")
+ let firstGroundTruth = firstObb.center
+ axs[i,j].set_xlim(firstGroundTruth.t.x - 50, firstGroundTruth.t.x + 50)
+ axs[i,j].set_ylim(firstGroundTruth.t.y - 50, firstGroundTruth.t.y + 50)
+ axs[i,j].get_xaxis().set_visible(false)
+ axs[i,j].get_yaxis().set_visible(false)
+ }
+ }
+ axs[0,0].set_title("fabs(theta) < 6 Degrees", fontsize:8)
+ axs[0,1].set_title("fabs(theta) < 12 Degrees", fontsize:8)
+ axs[1,0].set_title("fabs(theta) < 16 Degrees", fontsize:8)
+ axs[1,1].set_title("fabs(theta) >= 16 Degrees", fontsize:8)
+
+ print("hello")
+ let xy_thresh = 20.0 //pixels
+ let theta_thresh = 0.5 //radians // consider doing overlap.
+
+
+ // NN Params
+ let (imageHeight, imageWidth, imageChannels) = (40, 70, 1)
+ let featureSize = 256
+ let kHiddenDimension = 512
+
+ var useClassifier = true
+ if useClassifier {
+ var classifier = bitModel
+ for j in 0...200 {
+ var (dx, dy, dtheta, startpose, v, poseId, fg) = initialize_and_perturb(p: firstObb.center)
+ let factorNNC = ProbablisticTrackingFactor2(poseId,
+ measurement: firstFrame,
+ classifier: classifier,
+ patchSize: (40, 70),
+ appearanceModelSize: (40, 70)
+ )
+ fg.store(factorNNC)
+
+
+ // PERFORM GRADIENT DESCENT
+ var (conv, errors, xs, ys, thetas) = initialize_empty_arrays()
+ print("starting optimization")
+ for i in 0.. firstObb.center.t.x + xy_thresh) || (v[poseId].t.x < firstObb.center.t.x - xy_thresh)
+ let y_out_of_bounds = (v[poseId].t.y > firstObb.center.t.y + xy_thresh) || (v[poseId].t.y < firstObb.center.t.y - xy_thresh)
+ let theta_out_of_bounds = (v[poseId].rot.theta > firstObb.center.rot.theta + theta_thresh) || (v[poseId].rot.theta < firstObb.center.rot.theta - theta_thresh)
+ if !x_out_of_bounds && !theta_out_of_bounds && !y_out_of_bounds {
+ if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.1 {
+ axs[0,0].plot(startpose.t.x,startpose.t.y,"g,", ms: 5)
+ } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.2 {
+ axs[0,1].plot(startpose.t.x,startpose.t.y,"g,", ms: 5)
+ } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.3 {
+ axs[1,0].plot(startpose.t.x,startpose.t.y,"g,", ms: 5)
+ } else {
+ axs[1,1].plot(startpose.t.x,startpose.t.y,"g,", ms: 5)
+ }
+
+ } else {
+ if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.1 {
+ axs[0,0].plot(startpose.t.x,startpose.t.y,"r,", ms: 5)
+ } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.2 {
+ axs[0,1].plot(startpose.t.x,startpose.t.y,"r,", ms: 5)
+ } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.3 {
+ axs[1,0].plot(startpose.t.x,startpose.t.y,"r,", ms: 5)
+ } else {
+ axs[1,1].plot(startpose.t.x,startpose.t.y,"r,", ms: 5)
+ }
+ }
+ let (figs, axes) = plotFrameWithPatches3(frame: firstFrame, start: startpose, end: v[poseId], expected: firstObb.center, firstGroundTruth: firstObb.center, errors: errors, xs: xs, ys: ys, thetas: thetas)
+ var final_err: Double
+ var label_err: Double
+ var start_err: Double
+
+
+ final_err = factorNNC.errorVector(v[poseId]).x
+ label_err = factorNNC.errorVector(firstObb.center).x
+ start_err = factorNNC.errorVector(startpose).x
+
+ axes.set_title(String(axes.get_title())! + "\n final err = \(final_err)"
+ + "\n label err = \(label_err).x)"
+ + "\n start err = \(start_err)"
+ + "\n learning rate = \(lr)"
+ + "\n converged = \(conv)")
+ figs.savefig(folderName + "/optimization_final_\(j).png", bbox_inches: "tight")
+ plt.close("all")
+ fig.savefig(folderName + "/optimization_covergence_red_n_green_dots.png", bbox_inches: "tight")
+
+ }
+ }
+}
+
+
+}
+
+/// Returns `t` as a Swift tuple.
+fileprivate func unpack(_ t: Tuple2) -> (A, B) {
+ return (t.head, t.tail.head)
+}
+/// Returns `t` as a Swift tuple.
+fileprivate func unpack(_ t: Tuple1) -> (A) {
+ return (t.head)
+}
\ No newline at end of file
diff --git a/Scripts/Brando01.swift b/Scripts/Brando01.swift
new file mode 100644
index 00000000..0a4b9340
--- /dev/null
+++ b/Scripts/Brando01.swift
@@ -0,0 +1,102 @@
+import ArgumentParser
+
+import SwiftFusion
+import BeeDataset
+import BeeTracking
+import TensorFlow
+import PythonKit
+import Foundation
+
+import PenguinStructures
+
+/// Brando01: Tracker OpenCV
+struct Brando01: ParsableCommand {
+ func run() {
+
+ let dataDir = URL(fileURLWithPath: "./OIST_Data")
+ let data = OISTBeeVideo(directory: dataDir, length: 100)!
+ let testData = OISTBeeVideo(directory: dataDir, afterIndex: 100, length: 80)!
+ print("number of frames in training data:", data.labels.count)
+ print("number of frames in testing data", testData.labels.count, "\n\n")
+
+
+ let trackerEvaluation = TrackerEvaluationDataset(testData)
+
+ let np = Python.import("numpy")
+ let cv2 = Python.import("cv2")
+
+
+ let evalTracker: Tracker = {frames, start in
+
+ let tracker = cv2.TrackerMIL_create()
+
+ var BB = Python.tuple([Int(start.center.t.x)-20, Int(start.center.t.y)-35, 40, 70])
+ var smallframe = np.array(frames.first!.makeNumpyArray())
+ let leftpt = Python.tuple([Int(start.center.t.x)-35, Int(start.center.t.y)-35])
+ let rgtpt = Python.tuple([Int(start.center.t.x)+35, Int(start.center.t.y)+35])
+ cv2.rectangle(smallframe, leftpt, rgtpt, Python.tuple([0,150,0]), 5)
+ cv2.imwrite("./image_new.png", smallframe)
+ tracker[dynamicMember: "init"](frames.first!.makeNumpyArray(), BB)
+ var results = [PythonObject]()
+ for (index, frame) in frames.enumerated() {
+ var a = tracker[dynamicMember: "update"](frame.makeNumpyArray()).tuple2
+ let track_success = a.0
+ let newBB = a.1
+ if Bool(track_success)! {
+ results.append(newBB)
+ }
+
+
+
+ }
+ print("printing python BB")
+ var track = [OrientedBoundingBox]()
+ for result in results {
+ let pythonBB = result.tuple4
+ let rows = Int(pythonBB.2)!
+ let cols = Int(pythonBB.3)!
+ let rot = Rot2(0)
+ let vect = Vector2(Double(pythonBB.0)!+20, Double(pythonBB.1)!+35)
+ let center = Pose2(rot, vect)
+ let swiftBB = OrientedBoundingBox(center: center, rows: rows, cols: cols)
+ track.append(swiftBB)
+ }
+ return track
+ }
+
+
+
+
+
+ let plt = Python.import("matplotlib.pyplot")
+ let sequenceCount = 1
+ var results = trackerEvaluation.evaluate(evalTracker, sequenceCount: 1, deltaAnchor: 175, outputFile: "brando01")
+ for (index, value) in results.sequences.prefix(1).enumerated() {
+ var i: Int = 0
+ zip(value.subsequences.first!.frames, zip(value.subsequences.first!.prediction, value.subsequences.first!.groundTruth)).map {
+ let (fig, axes) = plotFrameWithPatches(frame: $0.0, actual: $0.1.0.center, expected: $0.1.1.center, firstGroundTruth: value.subsequences.first!.groundTruth.first!.center)
+ fig.savefig("Results/brando01/sequence\(index)/brando01\(i).png", bbox_inches: "tight")
+ plt.close("all")
+ i = i + 1
+ }
+
+
+ let (fig, axes) = plt.subplots(1, 2, figsize: Python.tuple([20, 20])).tuple2
+ fig.suptitle("Tracking positions and Subsequence Average Overlap with Accuracy \(String(format: "%.2f", value.subsequences.first!.metrics.accuracy)) and Robustness \(value.subsequences.first!.metrics.robustness).")
+
+ value.subsequences.map {
+ plotPoseDifference(
+ track: $0.prediction.map{$0.center}, withGroundTruth: $0.groundTruth.map{$0.center}, on: axes[0]
+ )
+ }
+ plotOverlap(
+ metrics: value.subsequences.first!.metrics, on: axes[1]
+ )
+ fig.savefig("Results/brando01/brando01_subsequence\(index).png", bbox_inches: "tight")
+ print("Accuracy for sequence is \(value.sequenceMetrics.accuracy) with Robustness of \(value.sequenceMetrics.robustness)")
+ }
+
+ print("Accuracy for all sequences is \(results.trackerMetrics.accuracy) with Robustness of \(results.trackerMetrics.robustness)")
+
+ }
+}
\ No newline at end of file
diff --git a/Scripts/Brando02.swift b/Scripts/Brando02.swift
new file mode 100644
index 00000000..ad027cb2
--- /dev/null
+++ b/Scripts/Brando02.swift
@@ -0,0 +1,66 @@
+import ArgumentParser
+
+import SwiftFusion
+import BeeDataset
+import BeeTracking
+import TensorFlow
+import PythonKit
+import Foundation
+
+import PenguinStructures
+
+/// Brando02 OpenCV tracker
+struct Brando02: ParsableCommand {
+ func run() {
+
+ let np = Python.import("numpy")
+ let cv2 = Python.import("cv2")
+ let os = Python.import("os")
+ let image_names = os.listdir("../OIST_Data/downsampled")
+ let track_names = os.listdir("../OIST_Data/tracks")
+ image_names.sort()
+ track_names.sort()
+ let track = track_names[10]
+ let frame = cv2.imread("../OIST_Data/downsampled/" + image_names[0])
+ let centers = Python.list()
+ let fs = Python.open("../OIST_Data/tracks/" + track, "r")
+ let lines = fs.readlines()
+ print(type(of: lines))
+ var i = 0
+ for line in lines {
+ if i == 0 {
+ i += 1
+ continue
+ }
+ i += 1
+ let lineSwift = String(line)
+ let lineSwift2 = lineSwift ?? ""
+ let nums = lineSwift2.components(separatedBy: " ")
+ let height = Float(nums[1])
+ let width = Float(nums[0])
+ centers.append(Python.tuple([Python.float(width),Python.float(height)]))
+ }
+
+
+ let width1 = Float(centers[0][0])
+ let height1 = Float(centers[0][1])
+ let width = width1 ?? 0
+ let height = height1 ?? 0
+ let BB = Python.tuple([Int(width-35),Int(height-35),70,70])
+ let tracker = cv2.TrackerMIL_create()
+ tracker[dynamicMember: "init"](frame, BB)
+ var results = [PythonObject]()
+ for image_name in image_names {
+ let framei = cv2.imread("../OIST_Data/downsampled/" + image_name)
+ var a = tracker[dynamicMember: "update"](framei).tuple2
+ let track_success = a.0
+ let newBB = a.1
+ if Bool(track_success)! {
+ results.append(newBB)
+ }
+ }
+
+
+ }
+
+}
\ No newline at end of file
diff --git a/Scripts/Brando03.swift b/Scripts/Brando03.swift
new file mode 100644
index 00000000..cc6b4f3b
--- /dev/null
+++ b/Scripts/Brando03.swift
@@ -0,0 +1,206 @@
+import ArgumentParser
+
+import SwiftFusion
+import BeeDataset
+import BeeTracking
+import TensorFlow
+import PythonKit
+import Foundation
+
+import PenguinStructures
+
+/// Brando01 SiamMask Tracker
+struct Brando03: ParsableCommand {
+
+ func run() {
+
+ let dataDir = URL(fileURLWithPath: "./OIST_Data")
+ let testData = OISTBeeVideo(directory: dataDir, afterIndex: 100, length: 80)!
+ print("number of frames in testing data", testData.labels.count, "\n\n")
+
+
+ let trackerEvaluation = TrackerEvaluationDataset(testData)
+ let os = Python.import("os")
+ let torch = Python.import("torch")
+
+ let np = Python.import("numpy")
+ let smtools = Python.import("SiamMask.tools")
+ let smutils = Python.import("SiamMask.utils")
+ let cfhelper = Python.import("SiamMask.utils.config_helper")
+ let ldhelper = Python.import("SiamMask.utils.load_helper")
+ let smtest = Python.import("SiamMask.tools.test")
+
+
+ let cv2 = Python.import("cv2")
+
+ let argparse = Python.import("argparse")
+ let parser = argparse.ArgumentParser()
+
+ parser.add_argument("--resume")
+ parser.add_argument("--config")
+ parser.add_argument("--base_path")
+ let args = parser.parse_args(["--resume", "../SiamMask/model_sharp/checkpoint_e20.pth", "--config", "../SiamMask/experiments/siammask_sharp/config_vot.json", "--base_path", "./OIST_Data/downsampled"])
+
+ print("ARGUMENTS", args)
+
+
+ print(Python.version)
+ print("hello")
+ let evalTracker: Tracker = { frames, start in
+
+ //SIAM MASK TRACKER IS HERE
+ let device = torch.device("cpu")
+ torch.backends.cudnn.benchmark = true
+
+ // # Setup Model
+ let cfg = cfhelper.load_config(args)
+ let custom = Python.import("SiamMask.experiments.siammask_sharp.custom")
+ var siammask = custom.Custom(anchors: cfg["anchors"])
+ siammask = ldhelper.load_pretrain(siammask, args.resume)
+
+ siammask.eval().to(device)
+ let init_rect = Python.tuple([Int(start.center.t.x)-20, Int(start.center.t.y)-20, 40, 70])
+ let tup = init_rect.tuple4
+ let x = tup.0
+ let y = tup.1
+ let w = tup.2
+ let h = tup.3
+
+ var state: PythonObject = 0
+ var results = [PythonObject]()
+
+ for (f, im) in frames.enumerated() {
+
+ let im_np = im.makeNumpyArray()
+ let im_3d = np.squeeze(np.stack(Python.tuple([im_np, im_np, im_np]), axis: 2))
+
+ if f == 0 { // init
+ let target_pos = np.array([x + w / 2, y + h / 2])
+ let target_sz = np.array([w, h])
+ state = smtest.siamese_init(im_3d, target_pos, target_sz, siammask, cfg["hp"], device: device) //# init tracker
+ results.append(Python.tuple([Int(x + w / 2)!, Int(y + h / 2)!]))
+ } else if f > 0 { //# tracking
+ state = smtest.siamese_track(state, im_3d, mask_enable: true, refine_enable: true, device: device) //# track
+ let location = state["ploygon"].flatten()
+
+
+ results.append(location)
+
+
+ }
+
+ }
+
+ var track = [OrientedBoundingBox]()
+ for (i, result) in results.enumerated() {
+ if i > 0 {
+ let location = result
+ let centx = Int((location[0]+location[2]+location[4]+location[6])/4)!
+ let centy = Int((location[1]+location[3]+location[5]+location[7])/4)!
+ let dx1 = location[0]-location[2]
+ let dy1 = location[1]-location[3]
+ let dx2 = location[0]-location[6]
+ let dy2 = location[1]-location[7]
+ let dist1 = sqrt(pow(Double(dx1)!, 2) + pow(Double(dy1)!, 2))
+ let dist2 = (pow(Double(dx2)!, 2) + pow(Double(dy2)!, 2)).squareRoot()
+ let locx: Int
+ let locy: Int
+ let rows: Int
+ let cols: Int
+ if dist1 < dist2 {
+ locx = Int((location[0]+location[2])/2)!
+ locy = Int((location[1]+location[3])/2)!
+ rows = Int(dist1)
+ cols = Int(dist2)
+ } else {
+ locx = Int((location[0]+location[6])/2)!
+ locy = Int((location[1]+location[7])/2)!
+ rows = Int(dist2)
+ cols = Int(dist1)
+ }
+ let dx = Double(abs(locx - centx))
+ let dy = Double(abs(locy - centy))
+ var theta = Double.pi/2
+ print("polygon", result)
+ print("center", centx, centy)
+ print("dx and dy", dx, dy)
+ print("theta initial", theta)
+ if dx != 0 {
+ theta = atan(dy/dx)
+ }
+
+ if locx >= centx && locy < centy{
+ theta = -theta
+ } else if locx < centx && locy >= centy{
+ theta = .pi - theta
+ } else if locx < centx && locy < centy{
+ theta = .pi + theta
+ }
+ print("theta final", theta)
+
+ let rot = Rot2(theta)
+ let vect = Vector2(Double(centx), Double(centy))
+ print("rotation", rot, "\n\n")
+ let center = Pose2(rot, vect)
+ let swiftBB = OrientedBoundingBox(center: center, rows: rows, cols: cols)
+ track.append(swiftBB)
+ } else {
+ let swiftBB = start
+ track.append(swiftBB)
+ }
+ }
+ return track
+ }
+
+ let plt = Python.import("matplotlib.pyplot")
+ let sequenceCount = 20
+ var eval_results = trackerEvaluation.evaluate(evalTracker, sequenceCount: sequenceCount, deltaAnchor: 175, outputFile: "brando03")
+ print("done evaluating")
+ var total_overlap = eval_results.sequences.prefix(sequenceCount)[0].subsequences.first!.metrics.overlap
+
+ for (index, value) in eval_results.sequences.prefix(sequenceCount).enumerated() {
+
+ print("done,", index)
+ let (fig, axes) = plt.subplots(1, 2, figsize: Python.tuple([20, 20])).tuple2
+ fig.suptitle("Tracking positions and Subsequence Average Overlap with Accuracy \(String(format: "%.2f", value.subsequences.first!.metrics.accuracy)) and Robustness \(value.subsequences.first!.metrics.robustness).")
+ value.subsequences.map {
+ //zip($0.prediction, $0.groundTruth).enumerated().map{($0.0, $0.1.0.center, $0.1.1.center)})
+ let encoder = JSONEncoder()
+ let data = try! encoder.encode($0.prediction)
+ FileManager.default.createFile(atPath: "Results/brando03/prediction_siammask_sequence_\(index).json", contents: data, attributes: nil)
+ }
+ value.subsequences.map {
+ plotPoseDifference(
+ track: $0.prediction.map{$0.center}, withGroundTruth: $0.groundTruth.map{$0.center}, on: axes[0]
+ )
+ }
+ plotOverlap(
+ metrics: value.subsequences.first!.metrics, on: axes[1]
+ )
+
+ fig.savefig("Results/brando03/brando03_subsequence\(index).png", bbox_inches: "tight")
+ print("Accuracy for sequence is \(value.sequenceMetrics.accuracy) with Robustness of \(value.sequenceMetrics.robustness)")
+ }
+ print("Accuracy for all sequences is \(eval_results.trackerMetrics.accuracy) with Robustness of \(eval_results.trackerMetrics.robustness)")
+
+ let pickle = Python.import("pickle");
+ let f = Python.open("Results/EAO/siammask.data", "wb")
+ pickle.dump(eval_results.expectedAverageOverlap.curve, f)
+
+
+ // var average_overlap = [Double]()
+ // for (i, val) in total_overlap.enumerated() {
+ // average_overlap.append(val/Double(sequenceCount))
+ // }
+ // let (fig, ax) = plt.subplots().tuple2
+ // ax.plot(average_overlap)
+ // ax.set_title("Overlap")
+ // fig.savefig("average_overlap.png")
+
+
+
+
+
+ }
+
+}
\ No newline at end of file
diff --git a/Scripts/Brando04.swift b/Scripts/Brando04.swift
new file mode 100644
index 00000000..d1354f30
--- /dev/null
+++ b/Scripts/Brando04.swift
@@ -0,0 +1,108 @@
+import ArgumentParser
+
+import SwiftFusion
+import BeeDataset
+import BeeTracking
+import TensorFlow
+import PythonKit
+import Foundation
+
+
+
+
+/// Brando04: NNClassifier training
+struct Brando04: ParsableCommand {
+ typealias LikelihoodModel = TrackingLikelihoodModel
+
+
+ @Flag(help: "Training mode")
+ var training: Bool = false
+
+ let num_boxes: Int = 10000
+
+ func getTrainingDataBG(
+ from dataset: OISTBeeVideo
+ ) -> (Tensor, Tensor) {
+ print("bg")
+
+ let bgBoxes = dataset.makeBackgroundBoundingBoxes(patchSize: (40, 70), batchSize: num_boxes).map {
+ $0.frame!.patch(at: $0.obb)
+ }
+ print("bg2")
+ let labels = Tensor(zeros: [num_boxes])
+ print("labels done bg")
+ let patches = Tensor(stacking: bgBoxes.map {$0})
+ print("patches done bg")
+ return (labels, patches)
+ }
+
+
+
+ func getTrainingDataFG(
+ from dataset: OISTBeeVideo
+ ) -> (Tensor, Tensor) {
+ print("fg")
+ let bgBoxes = dataset.makeForegroundBoundingBoxes(patchSize: (40, 70), batchSize: num_boxes).map {
+ $0.frame!.patch(at: $0.obb)
+ }
+ print("bg2")
+ let labels = Tensor(zeros: [num_boxes])
+ print("labels done bg")
+ let patches = Tensor(stacking: bgBoxes.map {$0})
+ print("patches done bg")
+ return (labels, patches)
+ }
+
+
+
+ // Just runs an RP tracker and saves image to file
+ // Make sure you have a folder `Results/fan12` before running
+ func run() {
+ let folderName = "classifiers/classifiers_today"
+ if !FileManager.default.fileExists(atPath: folderName) {
+ do {
+ try FileManager.default.createDirectory(atPath: folderName, withIntermediateDirectories: true, attributes: nil)
+ } catch {
+ print(error.localizedDescription)
+ }
+ } else {
+ print("folder exists")
+ }
+
+
+ let dataDir = URL(fileURLWithPath: "./OIST_Data")
+ let trainingDataset = OISTBeeVideo(directory: dataDir, length: 100)!
+ var (labels_fg, patches_fg) = getTrainingDataFG(from: trainingDataset)
+ var (labels_bg, patches_bg) = getTrainingDataBG(from: trainingDataset)
+
+
+ var patches = Tensor(stacking: patches_bg.unstacked() + patches_fg.unstacked())
+ var labels = Tensor(concatenate(labels_bg, labels_fg))
+ print("shape of patches", patches.shape)
+ print("shape of labels", labels.shape)
+
+ let kHiddenDimension = 512
+ let featSize = 512
+ let iterations = [5,6,7]
+
+ for i in iterations {
+ let path = "./classifiers/classifiers_today/classifier_weight_\(kHiddenDimension)_\(featSize)_\(i)_60000boxes_600epochs.npy"
+ if FileManager.default.fileExists(atPath: path) {
+ print("File Already Exists. Abort training")
+ continue
+ }
+ print("Training...")
+ let rae: PretrainedNNClassifier = PretrainedNNClassifier(
+ patches: patches,
+ labels: labels,
+ given: PretrainedNNClassifier.HyperParameters(hiddenDimension: kHiddenDimension, latentDimension: featSize, weightFile: "", learningRate: 1e-3),
+ train_mode: "from_scratch"
+ )
+ rae.save(to: path)
+
+ }
+
+
+
+ }
+}
diff --git a/Scripts/Brando05.swift b/Scripts/Brando05.swift
new file mode 100644
index 00000000..b8a30e9e
--- /dev/null
+++ b/Scripts/Brando05.swift
@@ -0,0 +1,119 @@
+import ArgumentParser
+
+import SwiftFusion
+import BeeDataset
+import BeeTracking
+import TensorFlow
+import PythonKit
+import Foundation
+
+import PenguinStructures
+
+/// Brando05: TRACKING with NN Classifier
+struct Brando05: ParsableCommand {
+ @Option(help: "Run for number of frames")
+ var trackLength: Int = 80
+
+ // Runs NNClassifier tracker on n number of sequences and outputs relevant images and statistics
+ func run() {
+ let np = Python.import("numpy")
+ let featureSizes = [256]
+ let kHiddenDimensions = [512]
+ let iterations = [1]
+ let trainingDatasetSize = 100
+
+ let dataDir = URL(fileURLWithPath: "./OIST_Data")
+ let data = OISTBeeVideo(directory: dataDir, length: trainingDatasetSize)!
+ let testData = OISTBeeVideo(directory: dataDir, afterIndex: trainingDatasetSize, length: trackLength)!
+
+ let trackerEvaluation = TrackerEvaluationDataset(testData)
+
+ for featureSize in featureSizes {
+ for kHiddenDimension in kHiddenDimensions {
+ for j in iterations {
+
+
+ let (imageHeight, imageWidth, imageChannels) =
+ (40, 70, 1)
+
+ var classifier = NNClassifier(
+ imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels, hiddenDimension: kHiddenDimension, latentDimension: featureSize
+ )
+ // LOAD THE CLASSIFIER
+ classifier.load(weights: np.load("./classifiers/classifiers_today/large_classifier_weight_\(kHiddenDimension)_\(featureSize)_\(j).npy", allow_pickle: true))
+
+ let evalTracker: Tracker = {frames, start in
+ var tracker = makeProbabilisticTracker2(
+ model: classifier,
+ frames: frames,
+ targetSize: (40, 70)
+ )
+ let prediction = tracker.infer(knownStart: Tuple1(start.center), withSampling: true)
+ let track = tracker.frameVariableIDs.map { OrientedBoundingBox(center: prediction[unpack($0)], rows: 40, cols:70) }
+ return track
+
+ }
+
+ let plt = Python.import("matplotlib.pyplot")
+ let sequenceCount = 1
+ var results = trackerEvaluation.evaluate(evalTracker, sequenceCount: sequenceCount, deltaAnchor: 175, outputFile: "classifier")
+
+
+ for (index, value) in results.sequences.prefix(sequenceCount).enumerated() {
+ let folderName = "Results/classifier/classifier_\(kHiddenDimension)_\(featureSize)_\(j)_10000sampling"
+ print(folderName)
+ if !FileManager.default.fileExists(atPath: folderName) {
+ do {
+ try FileManager.default.createDirectory(atPath: folderName, withIntermediateDirectories: true, attributes: nil)
+ try FileManager.default.createDirectory(atPath: folderName + "/sequence0", withIntermediateDirectories: true, attributes: nil)
+ } catch {
+ print(error.localizedDescription)
+ }
+ }
+
+ var i: Int = 0
+ zip(value.subsequences.first!.frames, zip(value.subsequences.first!.prediction, value.subsequences.first!.groundTruth)).map {
+ let (fig, axes) = plotFrameWithPatches(frame: $0.0, actual: $0.1.0.center, expected: $0.1.1.center, firstGroundTruth: value.subsequences.first!.groundTruth.first!.center)
+ fig.savefig(folderName + "/sequence\(index)/classifier_\(i).png", bbox_inches: "tight")
+ plt.close("all")
+ i = i + 1
+ }
+
+ let (fig, axes) = plt.subplots(1, 2, figsize: Python.tuple([20, 20])).tuple2
+ fig.suptitle("Tracking positions and Subsequence Average Overlap with Accuracy \(String(format: "%.2f", value.subsequences.first!.metrics.accuracy)) and Robustness \(value.subsequences.first!.metrics.robustness).")
+
+ value.subsequences.map {
+ plotPoseDifference(
+ track: $0.prediction.map{$0.center}, withGroundTruth: $0.groundTruth.map{$0.center}, on: axes[0]
+ )
+ }
+ plotOverlap(
+ metrics: value.subsequences.first!.metrics, on: axes[1]
+ )
+
+ fig.savefig(folderName + "/classifier_\(kHiddenDimension)_\(featureSize)_\(j)subsequence\(index).png", bbox_inches: "tight")
+ print("Accuracy for sequence is \(value.sequenceMetrics.accuracy) with Robustness of \(value.sequenceMetrics.robustness)")
+ }
+
+ print("Accuracy for all sequences is \(results.trackerMetrics.accuracy) with Robustness of \(results.trackerMetrics.robustness)")
+
+
+
+
+ }
+ }
+ }
+
+
+
+ }
+}
+
+/// Returns `t` as a Swift tuple.
+fileprivate func unpack(_ t: Tuple2) -> (A, B) {
+ return (t.head, t.tail.head)
+}
+/// Returns `t` as a Swift tuple.
+fileprivate func unpack(_ t: Tuple1) -> (A) {
+ return (t.head)
+}
diff --git a/Scripts/Brando06.swift b/Scripts/Brando06.swift
new file mode 100644
index 00000000..811469a4
--- /dev/null
+++ b/Scripts/Brando06.swift
@@ -0,0 +1,180 @@
+import ArgumentParser
+
+import SwiftFusion
+import BeeDataset
+import BeeTracking
+import TensorFlow
+import PythonKit
+import Foundation
+
+
+import PenguinStructures
+
+// This script produces HISTOGRAMS for the output of NN Classifiers
+struct Brando06: ParsableCommand {
+
+ func run() {
+ let dataDir = URL(fileURLWithPath: "./OIST_Data")
+ let testData = OISTBeeVideo(directory: dataDir, afterIndex: 100, length: 80)!
+ let batchSize = 3000
+ let fgBoxes = testData.makeForegroundBoundingBoxes(patchSize: (40, 70), batchSize: batchSize)
+ let bgBoxes = testData.makeBackgroundBoundingBoxes(patchSize: (40, 70), batchSize: batchSize)
+ let fgpatches = Tensor(stacking: fgBoxes.map { $0.frame!.patch(at: $0.obb)})
+ let bgpatches = Tensor(stacking: bgBoxes.map { $0.frame!.patch(at: $0.obb)})
+ let np = Python.import("numpy")
+ let kHiddenDimensions = [512]
+ let featSizes = [512]
+ var plt = Python.import("matplotlib.pyplot")
+
+
+ for i in featSizes {
+ for j in kHiddenDimensions {
+ for num in 1...1 {
+
+ let featureSize = i
+ let kHiddenDimension = j
+
+
+ let (imageHeight, imageWidth, imageChannels) =
+ (40, 70, 1)
+
+ var classifier = NNClassifier(
+ imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels,
+ hiddenDimension: kHiddenDimension, latentDimension: featureSize
+ )
+ var weightsFile: String?
+ if let weightsFile = weightsFile {
+ classifier.load(weights: np.load(weightsFile, allow_pickle: true))
+ } else {
+ classifier.load(weights: np.load("./classifiers/classifiers_today/classifier_weight_512_512_1_doubletraining.npy", allow_pickle: true))
+ }
+
+ let outfg = classifier.classify(fgpatches)
+ let outbg = classifier.classify(bgpatches)
+ let softmaxfg = softmax(outfg)
+ let softmaxbg = softmax(outbg)
+ let folderName = "Results/brando06/classified_images"
+ if !FileManager.default.fileExists(atPath: folderName) {
+ do {
+ try FileManager.default.createDirectory(atPath: folderName, withIntermediateDirectories: true, attributes: nil)
+ } catch {
+ print(error.localizedDescription)
+ }
+ }
+ for i in 0...30 {
+ //Background
+ var (fig, ax) = plt.subplots(figsize: Python.tuple([8, 4])).tuple2
+ var patch = bgpatches[i,0...,0...,0]
+ var fr = np.squeeze(patch.makeNumpyArray())
+ ax.imshow(fr / 255.0, cmap: "gray")
+ ax.set_title("background image: \noutput index 0: \(softmaxbg[i][0])\noutput index 1: \(softmaxbg[i][1])")
+ fig.savefig(folderName + "/bgpatch\(i).png", bbox_inches: "tight")
+ plt.close("all")
+ //Foreground
+ (fig, ax) = plt.subplots(figsize: Python.tuple([8, 4])).tuple2
+ patch = fgpatches[i,0...,0...,0]
+ fr = np.squeeze(patch.makeNumpyArray())
+ ax.imshow(fr / 255.0, cmap: "gray")
+ ax.set_title("foreground image: \noutput index 0: \(softmaxfg[i][0])\noutput index 1: \(softmaxfg[i][1])")
+ fig.savefig(folderName + "/fgpatch\(i).png", bbox_inches: "tight")
+ plt.close("all")
+ }
+
+ let shapefg = outfg.shape
+ let shapebg = outbg.shape
+
+ var fgsum0 = 0.0
+ var fgsum1 = 0.0
+ var bgsum0 = 0.0
+ var bgsum1 = 0.0
+ var fg0_arr = [Double]()
+ var fg1_arr = [Double]()
+ var bg0_arr = [Double]()
+ var bg1_arr = [Double]()
+ for i in 0...batchSize-1 {
+ fgsum0 += Double(softmaxfg[i,0])!
+ fgsum1 += Double(softmaxfg[i,1])!
+ bgsum0 += Double(softmaxbg[i,0])!
+ bgsum1 += Double(softmaxbg[i,1])!
+ fg0_arr.append(Double(softmaxfg[i,0])!)
+ fg1_arr.append(Double(softmaxfg[i,1])!)
+ bg0_arr.append(Double(softmaxbg[i,0])!)
+ bg1_arr.append(Double(softmaxbg[i,1])!)
+ }
+ print("featSize", featureSize, "kHiddendimension", kHiddenDimension, "num", num, "val", fgsum1 + bgsum0 - fgsum0 - bgsum1)
+
+
+
+
+ print("feature size", featureSize)
+ print("fgsum1", fgsum1, "fgsum0", fgsum0)
+ print("bgsum1", bgsum1, "bgsum0", bgsum0)
+
+ var (figs, axs) = plt.subplots(2,2).tuple2
+ print("asda")
+
+ plt.subplots_adjust(left:0.1,
+ bottom:0.1,
+ right:0.9,
+ top:0.9,
+ wspace:0.4,
+ hspace:0.4)
+
+
+ var ax1 = axs[1,0]
+ ax1.hist(fg0_arr, range: Python.tuple([-1,1]), bins: 50)
+ var mean = fgsum0/Double(batchSize)
+ var sd = 0.0
+ for elem in fg0_arr {
+ sd += abs(elem - mean)/Double(batchSize)
+ }
+ ax1.set_title("Foreground. Output response for background. \n Mean = \(String(format: "%.2f", mean)) and SD = \(sd).", fontsize:8)
+
+ ax1 = axs[0,0]
+ ax1.hist(fg1_arr, range: Python.tuple([-1,1]), bins: 50)
+ mean = fgsum1/Double(batchSize)
+ sd = 0.0
+ for elem in fg1_arr {
+ sd += abs(elem - mean)/Double(batchSize)
+ }
+ ax1.set_title("Foreground. Output response for foreground. \n Mean = \(String(format: "%.2f", mean)) and SD = \(sd).", fontsize:8)
+
+ ax1 = axs[1,1]
+ ax1.hist(bg0_arr, range: Python.tuple([-1,1]), bins: 50)
+ mean = bgsum0/Double(batchSize)
+ sd = 0.0
+ for elem in bg0_arr {
+ sd += abs(elem - mean)/Double(batchSize)
+ }
+ ax1.set_title("Background. Output response for background. \n Mean = \(String(format: "%.2f", mean)) and SD = \(sd).", fontsize:8)
+
+ ax1 = axs[0,1]
+
+ ax1.hist(bg1_arr, range: Python.tuple([-1,1]), bins: 50)
+ mean = bgsum1/Double(batchSize)
+ sd = 0.0
+ for elem in bg1_arr {
+ sd += abs(elem - mean)/Double(batchSize)
+ }
+ ax1.set_title("Background. Output response for foreground. \n Mean = \(String(format: "%.2f", mean)) and SD = \(sd).", fontsize:8)
+
+ figs.savefig("hist_softmax_\(kHiddenDimension)_\(featureSize)_\(num).png")
+ plt.close(figs)
+
+
+
+ }
+ }
+ }
+
+
+
+
+
+
+
+
+
+
+ }
+}
diff --git a/Scripts/Brando07.swift b/Scripts/Brando07.swift
new file mode 100644
index 00000000..e2e55e31
--- /dev/null
+++ b/Scripts/Brando07.swift
@@ -0,0 +1,172 @@
+import ArgumentParser
+
+import SwiftFusion
+import BeeDataset
+import BeeTracking
+import TensorFlow
+import PythonKit
+import Foundation
+
+import PenguinStructures
+
+/// Brando07: RAE + Prob density histograms
+struct Brando07: ParsableCommand {
+ @Option(help: "Run for number of frames")
+ var trackLength: Int = 80
+
+ @Option(help: "Size of feature space")
+ var featureSize: Int = 256
+
+ @Option(help: "Pretrained weights")
+ var weightsFile: String?
+
+ func run() {
+ let np = Python.import("numpy")
+ let kHiddenDimension = 512
+
+ let (imageHeight, imageWidth, imageChannels) =
+ (40, 70, 1)
+
+
+ var rae = DenseRAE(
+ imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels,
+ hiddenDimension: kHiddenDimension, latentDimension: featureSize
+ )
+
+ if let weightsFile = weightsFile {
+ rae.load(weights: np.load(weightsFile, allow_pickle: true))
+ } else {
+ rae.load(weights: np.load("./oist_rae_weight_\(featureSize).npy", allow_pickle: true))
+ }
+ print("s")
+
+ let trainingDatasetSize = 100
+
+ let dataDir = URL(fileURLWithPath: "./OIST_Data")
+ let numberOfTrainingSamples = 3000
+
+ let dataset = OISTBeeVideo(directory: dataDir, length: 100)! // calling this twice caused the Killed to happen
+ let batchSize = 3000
+ let fgBoxes = dataset.makeForegroundBoundingBoxes(patchSize: (40, 70), batchSize: batchSize)
+ print("here 1.5")
+ let bgBoxes = dataset.makeBackgroundBoundingBoxes(patchSize: (40, 70), batchSize: batchSize)
+ print("tests here2")
+ let fgpatches = Tensor(stacking: fgBoxes.map { $0.frame!.patch(at: $0.obb)})
+ let bgpatches = Tensor(stacking: bgBoxes.map { $0.frame!.patch(at: $0.obb)})
+ print("patches complete")
+
+
+ let batchPositive = rae.encode(fgpatches)
+ print("shape batch positive", batchPositive.shape)
+ let foregroundModel = MultivariateGaussian(from:batchPositive, regularizer: 1e-3)
+ let batchNegative = rae.encode(bgpatches)
+ let backgroundModel = MultivariateGaussian(from: batchNegative, regularizer: 1e-3)
+
+ var outfg0 = [Double]()
+ var outfg1 = [Double]()
+ var outbg0 = [Double]()
+ var outbg1 = [Double]()
+ print(batchPositive[0,0...].shape)
+ print(backgroundModel.probability(batchPositive[0,0...]))
+ print(foregroundModel.probability(batchPositive[0,0...]))
+
+ for i in 0...numberOfTrainingSamples-1 {
+ outfg0.append(backgroundModel.probability(batchPositive[i,0...]))
+ outfg1.append(foregroundModel.probability(batchPositive[i,0...]))
+ outbg0.append(backgroundModel.probability(batchNegative[i,0...]))
+ outbg1.append(foregroundModel.probability(batchNegative[i,0...]))
+ }
+
+
+ var plt = Python.import("matplotlib.pyplot")
+
+
+ var fgsum0 = 0.0
+ var fgsum1 = 0.0
+ var bgsum0 = 0.0
+ var bgsum1 = 0.0
+ var fg0_arr = [Double]()
+ var fg1_arr = [Double]()
+ var bg0_arr = [Double]()
+ var bg1_arr = [Double]()
+ for i in 0...batchSize-1 {
+ fgsum0 += (outfg0[i])
+ fgsum1 += (outfg1[i])
+ bgsum0 += (outbg0[i])
+ bgsum1 += (outbg1[i])
+ fg0_arr.append((outfg0[i]))
+ fg1_arr.append((outfg1[i]))
+ bg0_arr.append((outbg0[i]))
+ bg1_arr.append((outbg1[i]))
+ }
+ print("featSize", featureSize, "kHiddendimension", kHiddenDimension, "val", fgsum1 + bgsum0 - fgsum0 - bgsum1)
+
+
+
+
+ print("feature size", featureSize)
+ print("fgsum1", fgsum1, "fgsum0", fgsum0)
+ print("bgsum1", bgsum1, "bgsum0", bgsum0)
+
+ var (figs, axs) = plt.subplots(2,2).tuple2
+ print("asda")
+
+ plt.subplots_adjust(left:0.1,
+ bottom:0.1,
+ right:0.9,
+ top:0.9,
+ wspace:0.4,
+ hspace:0.4)
+
+
+ var ax1 = axs[1,0]
+ ax1.hist(fg0_arr, range: Python.tuple([-1,1]), bins: 50)
+ var mean = fgsum0/Double(batchSize)
+ var sd = 0.0
+ for elem in fg0_arr {
+ sd += abs(elem - mean)/Double(batchSize)
+ }
+ ax1.set_title("Foreground. Output response for background. \n Mean = \(String(format: "%.2f", mean)) and SD = \(sd).", fontsize:8)
+
+ ax1 = axs[0,0]
+ ax1.hist(fg1_arr, range: Python.tuple([-1,1]), bins: 50)
+ mean = fgsum1/Double(batchSize)
+ sd = 0.0
+ for elem in fg1_arr {
+ sd += abs(elem - mean)/Double(batchSize)
+ }
+ ax1.set_title("Foreground. Output response for foreground. \n Mean = \(String(format: "%.2f", mean)) and SD = \(sd).", fontsize:8)
+
+ ax1 = axs[1,1]
+ ax1.hist(bg0_arr, range: Python.tuple([-1,1]), bins: 50)
+ mean = bgsum0/Double(batchSize)
+ sd = 0.0
+ for elem in bg0_arr {
+ sd += abs(elem - mean)/Double(batchSize)
+ }
+ ax1.set_title("Background. Output response for background. \n Mean = \(String(format: "%.2f", mean)) and SD = \(sd).", fontsize:8)
+
+ ax1 = axs[0,1]
+
+ ax1.hist(bg1_arr, range: Python.tuple([-1,1]), bins: 50)
+ mean = bgsum1/Double(batchSize)
+ sd = 0.0
+ for elem in bg1_arr {
+ sd += abs(elem - mean)/Double(batchSize)
+ }
+ ax1.set_title("Background. Output response for foreground. \n Mean = \(String(format: "%.2f", mean)) and SD = \(sd).", fontsize:8)
+
+ figs.savefig("hist_rae_\(kHiddenDimension)_\(featureSize).png")
+ plt.close(figs)
+
+
+
+ }
+
+
+
+
+
+}
+
+
diff --git a/Scripts/Brando08.swift b/Scripts/Brando08.swift
new file mode 100644
index 00000000..f4d414c1
--- /dev/null
+++ b/Scripts/Brando08.swift
@@ -0,0 +1,47 @@
+import ArgumentParser
+
+import SwiftFusion
+import BeeDataset
+import BeeTracking
+import TensorFlow
+import PythonKit
+import Foundation
+
+
+import PenguinStructures
+
+// PRINT IMAGE PATCHES TO VISUALIZE
+struct Brando08: ParsableCommand {
+
+ func run() {
+ let dataDir = URL(fileURLWithPath: "./OIST_Data")
+ let dataset = OISTBeeVideo(directory: dataDir, length: 100)!
+ let batchSize = 300
+ let fgBoxes = dataset.makeForegroundBoundingBoxes(patchSize: (40, 70), batchSize: batchSize)
+ let bgBoxes = dataset.makeBackgroundBoundingBoxes(patchSize: (40, 70), batchSize: batchSize)
+ let fgpatches = Tensor(stacking: fgBoxes.map { $0.frame!.patch(at: $0.obb)})
+ let bgpatches = Tensor(stacking: bgBoxes.map { $0.frame!.patch(at: $0.obb)})
+ let np = Python.import("numpy")
+ var plt = Python.import("matplotlib.pyplot")
+ let mpl = Python.import("matplotlib")
+
+ print(fgpatches.shape)
+ for i in batchSize-100...batchSize-1 {
+ let (fig, ax) = plt.subplots(figsize: Python.tuple([8, 4])).tuple2
+ let patch = bgpatches[i,0...,0...,0]
+ let fr = np.squeeze(patch.makeNumpyArray())
+ ax.imshow(fr / 255.0, cmap: "gray")
+ let folderName = "Results/brando08/bgpatches"
+ if !FileManager.default.fileExists(atPath: folderName) {
+ do {
+ try FileManager.default.createDirectory(atPath: folderName, withIntermediateDirectories: true, attributes: nil)
+ } catch {
+ print(error.localizedDescription)
+ }
+ }
+ fig.savefig("Results/brando08/bgpatches/patch\(i).png", bbox_inches: "tight")
+ plt.close("all")
+
+ }
+ }
+}
diff --git a/Scripts/Brando10.swift b/Scripts/Brando10.swift
new file mode 100644
index 00000000..827f8dcd
--- /dev/null
+++ b/Scripts/Brando10.swift
@@ -0,0 +1,64 @@
+
+
+
+import ArgumentParser
+
+import SwiftFusion
+import BeeDataset
+import BeeTracking
+import TensorFlow
+import PythonKit
+import Foundation
+
+import PenguinStructures
+
+/// Brando10: Plot the samplings in progress.
+struct Brando10: ParsableCommand {
+ @Option(help: "Run for number of frames")
+ var trackLength: Int = 80
+
+ // Runs NNClassifier tracker on n number of sequences and outputs relevant images and statistics
+ func run() {
+ let np = Python.import("numpy")
+ let featureSizes = [512]
+ let kHiddenDimensions = [512]
+ let iterations = [1]
+ let trainingDatasetSize = 100
+
+ let dataDir = URL(fileURLWithPath: "./OIST_Data")
+ let data = OISTBeeVideo(directory: dataDir, length: trainingDatasetSize)!
+ let testData = OISTBeeVideo(directory: dataDir, afterIndex: trainingDatasetSize, length: trackLength)!
+
+ let trackerEvaluation = TrackerEvaluationDataset(testData)
+
+ for i in 0...78 {
+ let folderName = "./sampling_512_512_2000samples"
+ let posex_np = np.load(folderName + "/sampling_frame_\(i)_posex.npy")
+ let posey_np = np.load(folderName + "/sampling_frame_\(i)_posey.npy")
+ let posetheta_np = np.load(folderName + "/sampling_frame_\(i)_posetheta.npy")
+ let error_np = np.load(folderName + "/sampling_frame_\(i)_error.npy")
+ let t = np.arange(0, 2000, 1)
+
+
+ let plt = Python.import("matplotlib.pyplot")
+ var (figs, axs) = plt.subplots(1,1, figsize: Python.tuple([10, 4])).tuple2
+
+
+ axs[0].plot(t,error_np, linewidth: 1)
+ axs[0].set_title("error")
+ plt.subplots_adjust(left:0.1,
+ bottom:0.1,
+ right:0.9,
+ top:0.9,
+ wspace:0.4,
+ hspace:0.4)
+
+ figs.savefig(folderName + "/sampling_figure_\(i).png")
+ plt.close("all")
+
+ }
+
+
+
+ }
+}
\ No newline at end of file
diff --git a/Scripts/Brando11.swift b/Scripts/Brando11.swift
new file mode 100644
index 00000000..671be2b5
--- /dev/null
+++ b/Scripts/Brando11.swift
@@ -0,0 +1,42 @@
+import ArgumentParser
+import SwiftFusion
+import BeeDataset
+import BeeTracking
+import TensorFlow
+import PythonKit
+import Foundation
+import PenguinStructures
+
+/// Brando11: compute the mean displacement
+struct Brando11: ParsableCommand {
+ @Option(help: "Run for number of frames")
+ var trackLength: Int = 80
+
+ func run() {
+ let np = Python.import("numpy")
+ let plt = Python.import("matplotlib.pyplot")
+ let trainingDatasetSize = 100
+
+ // LOAD THE IMAGE AND THE GROUND TRUTH ORIENTED BOUNDING BOX
+ let dataDir = URL(fileURLWithPath: "./OIST_Data")
+ let data = OISTBeeVideo(directory: dataDir, afterIndex: trainingDatasetSize, length: trackLength)!
+ var dX = [Double]()
+ var dY = [Double]()
+ var dTheta = [Double]()
+ for track in data.tracks {
+ var prevObb: OrientedBoundingBox?
+ prevObb = nil
+ for obb in track.boxes {
+ if prevObb == nil {
+ prevObb = obb
+ } else {
+ dX.append(obb.center.t.x - (prevObb)!.center.t.x)
+ dY.append(obb.center.t.y - (prevObb)!.center.t.y)
+ dTheta.append(obb.center.rot.theta - (prevObb)!.center.rot.theta)
+ }
+ }
+ }
+ // Plot histogram.
+
+ }
+}
diff --git a/Scripts/Brando12.swift b/Scripts/Brando12.swift
new file mode 100644
index 00000000..fad930f2
--- /dev/null
+++ b/Scripts/Brando12.swift
@@ -0,0 +1,311 @@
+import ArgumentParser
+import SwiftFusion
+import BeeDataset
+import BeeTracking
+import TensorFlow
+import PythonKit
+import Foundation
+import PenguinStructures
+
+/// Brando12: OPTIMIZATION CONVERGENCE VISUALIZATION [red & green dots]
+struct Brando12: ParsableCommand {
+ @Option(help: "Run for number of frames")
+ var trackLength: Int = 80
+
+ @Option(help: "Classifier or rae")
+ var useClassifier: Bool = true
+
+ func initialize_and_perturb(p: Pose2) -> (Double, Double, Double, Pose2, VariableAssignments, TypedID, FactorGraph) {
+ // CREATE A PLACEHOLDER FOR POSE
+ var v = VariableAssignments()
+ let poseId = v.store(p)
+ v[poseId].perturbWith(stddev: Vector3(0.3, 8, 4.6))
+ let dx = v[poseId].t.x - p.t.x
+ let dy = v[poseId].t.y - p.t.y
+ let dtheta = v[poseId].rot.theta - p.rot.theta
+ let startpose = v[poseId]
+ let fg = FactorGraph()
+
+ return (dx, dy, dtheta, startpose, v, poseId, fg)
+ }
+
+ func initialize_empty_arrays() -> (Bool, [Double], [Double], [Double], [Double]) {
+ var conv = true
+ var errors = [Double]()
+ var xs = [Double]()
+ var ys = [Double]()
+ var thetas = [Double]()
+ return (conv, errors, xs, ys, thetas)
+ }
+
+
+
+ func run() {
+ let np = Python.import("numpy")
+ let plt = Python.import("matplotlib.pyplot")
+ let trainingDatasetSize = 100
+
+ // LOAD THE IMAGE AND THE GROUND TRUTH ORIENTED BOUNDING BOX
+ let dataDir = URL(fileURLWithPath: "./OIST_Data")
+ let testData = OISTBeeVideo(directory: dataDir, afterIndex: trainingDatasetSize, length: trackLength)!
+ let data = OISTBeeVideo(directory: dataDir, length: trainingDatasetSize)!
+ let frames = testData.frames
+ let firstTrack = testData.tracks[0]
+ let firstFrame = frames[0]
+ let firstObb = firstTrack.boxes[0]
+
+
+ //OPTIMIZER GRADIENT DESCENT
+ let lr = 1e-7
+ var optimizer = GradientDescent(learningRate: lr)
+ let it_limit = 200
+
+
+ //CREATE A FOLDER TO CONTAIN THE END-RESULT IMAGES OF THE OPTIMIZATION
+ let str: String
+ if useClassifier{
+ str = "NNC"
+ } else {
+ str = "RAE"
+ }
+ let folderName = "Results/GD_optimization_\(str)_lr_\(lr)__3_09_2021_final_images_4subplots"
+ if !FileManager.default.fileExists(atPath: folderName) {
+ do {
+ try FileManager.default.createDirectory(atPath: folderName, withIntermediateDirectories: true, attributes: nil)
+ } catch {
+ print(error.localizedDescription)
+ }
+ }
+
+
+
+
+
+ //CREATE A FIG
+ print("hello1")
+ let (fig, axs) = plt.subplots(2,2).tuple2
+ let fr = np.squeeze(firstFrame.makeNumpyArray())
+ for i in 0...1 {
+ for j in 0...1 {
+ axs[i,j].imshow(fr / 255.0, cmap: "gray")
+ let firstGroundTruth = firstObb.center
+ axs[i,j].set_xlim(firstGroundTruth.t.x - 50, firstGroundTruth.t.x + 50)
+ axs[i,j].set_ylim(firstGroundTruth.t.y - 50, firstGroundTruth.t.y + 50)
+ axs[i,j].get_xaxis().set_visible(false)
+ axs[i,j].get_yaxis().set_visible(false)
+ }
+ }
+ axs[0,0].set_title("fabs(theta) < 0.1", fontsize:8)
+ axs[0,1].set_title("fabs(theta) < 0.2", fontsize:8)
+ axs[1,0].set_title("fabs(theta) < 0.3", fontsize:8)
+ axs[1,1].set_title("fabs(theta) >= 0.3", fontsize:8)
+
+ print("hello")
+ let xy_thresh = 20.0 //pixels
+ let theta_thresh = 0.5 //radians // consider doing overlap.
+
+
+ // NN Params
+ let (imageHeight, imageWidth, imageChannels) = (40, 70, 1)
+ let featureSize = 256
+ let kHiddenDimension = 512
+
+
+ if useClassifier {
+ var classifier = NNClassifier(
+ imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels, hiddenDimension: kHiddenDimension, latentDimension: featureSize
+ )
+ classifier.load(weights: np.load("./classifiers/classifiers_today/classifier_weight_\(kHiddenDimension)_\(featureSize)_1_doubletraining.npy", allow_pickle: true))
+
+ for j in 0...200 {
+ // RANDOMLY PERTURB THE GROUND TRUTH POSE AND CALCULATE THE PERTURBATION
+ var (dx, dy, dtheta, startpose, v, poseId, fg) = initialize_and_perturb(p: firstObb.center)
+ // CREATE THE FACTOR AND FACTOR GRAPH
+ let factorNNC = ProbablisticTrackingFactor2(poseId,
+ measurement: firstFrame,
+ classifier: classifier,
+ patchSize: (40, 70),
+ appearanceModelSize: (40, 70)
+ )
+ fg.store(factorNNC)
+
+
+ // PERFORM GRADIENT DESCENT
+ var (conv, errors, xs, ys, thetas) = initialize_empty_arrays()
+ print("starting optimization")
+ for i in 0.. firstObb.center.t.x + xy_thresh) || (v[poseId].t.x < firstObb.center.t.x - xy_thresh)
+ let y_out_of_bounds = (v[poseId].t.y > firstObb.center.t.y + xy_thresh) || (v[poseId].t.y < firstObb.center.t.y - xy_thresh)
+ let theta_out_of_bounds = (v[poseId].rot.theta > firstObb.center.rot.theta + theta_thresh) || (v[poseId].rot.theta < firstObb.center.rot.theta - theta_thresh)
+ if !x_out_of_bounds && !theta_out_of_bounds && !y_out_of_bounds {
+
+ if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.1 {
+ axs[0,0].plot(startpose.t.x,startpose.t.y,"g,", ms: 1)
+ } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.2 {
+ axs[0,1].plot(startpose.t.x,startpose.t.y,"g,", ms: 1)
+ } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.3 {
+ axs[1,0].plot(startpose.t.x,startpose.t.y,"g,", ms: 1)
+ } else {
+ axs[1,1].plot(startpose.t.x,startpose.t.y,"g,", ms: 1)
+ }
+
+ } else {
+
+ if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.1 {
+ axs[0,0].plot(startpose.t.x,startpose.t.y,"r,", ms: 1)
+ } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.2 {
+ axs[0,1].plot(startpose.t.x,startpose.t.y,"r,", ms: 1)
+ } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.3 {
+ axs[1,0].plot(startpose.t.x,startpose.t.y,"r,", ms: 1)
+ } else {
+ axs[1,1].plot(startpose.t.x,startpose.t.y,"r,", ms: 1)
+ }
+ }
+ let (figs, axes) = plotFrameWithPatches3(frame: firstFrame, start: startpose, end: v[poseId], expected: firstObb.center, firstGroundTruth: firstObb.center, errors: errors, xs: xs, ys: ys, thetas: thetas)
+ var final_err: Double
+ var label_err: Double
+ var start_err: Double
+
+
+ final_err = factorNNC.errorVector(v[poseId]).x
+ label_err = factorNNC.errorVector(firstObb.center).x
+ start_err = factorNNC.errorVector(startpose).x
+
+ axes.set_title(String(axes.get_title())! + "\n final err = \(final_err)"
+ + "\n label err = \(label_err).x)"
+ + "\n start err = \(start_err)"
+ + "\n learning rate = \(lr)"
+ + "\n converged = \(conv)")
+ figs.savefig(folderName + "/optimization_final_\(j).png", bbox_inches: "tight")
+ plt.close("all")
+ fig.savefig(folderName + "/optimization_covergence_red_n_green_dots.png", bbox_inches: "tight")
+
+
+
+
+ }
+
+
+
+
+
+
+ } else {
+ // LOAD RAE AND TRAIN BG AND FG MODELS
+ var rae = DenseRAE(
+ imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels,
+ hiddenDimension: kHiddenDimension, latentDimension: featureSize
+ )
+ rae.load(weights: np.load("./oist_rae_weight_\(featureSize).npy", allow_pickle: true))
+ let (fg, bg, _) = getTrainingBatches(
+ dataset: data, boundingBoxSize: (40, 70), fgBatchSize: 3000, bgBatchSize: 3000,
+ fgRandomFrameCount: 10, bgRandomFrameCount: 10, useCache: true
+ )
+ let batchPositive = rae.encode(fg)
+ let foregroundModel = MultivariateGaussian(from:batchPositive, regularizer: 1e-3)
+ let batchNegative = rae.encode(bg)
+ let backgroundModel = MultivariateGaussian(from: batchNegative, regularizer: 1e-3)
+
+ for j in 0...200 {
+
+ // RANDOMLY PERTURB THE GROUND TRUTH POSE AND CALCULATE THE PERTURBATION
+ var (dx, dy, dtheta, startpose, v, poseId, fg) = initialize_and_perturb(p: firstObb.center)
+ // CREATE THE FACTOR AND FACTOR GRAPH
+ let factorRAE = ProbablisticTrackingFactor(poseId,
+ measurement: firstFrame,
+ encoder: rae,
+ patchSize: (40, 70),
+ appearanceModelSize: (40, 70),
+ foregroundModel: foregroundModel,
+ backgroundModel: backgroundModel,
+ maxPossibleNegativity: 1e7
+ )
+ fg.store(factorRAE)
+ // PERFORM GRADIENT DESCENT
+ var (conv, errors, xs, ys, thetas) = initialize_empty_arrays()
+ print("starting optimization")
+ for i in 0.. firstObb.center.t.x + xy_thresh) || (v[poseId].t.x < firstObb.center.t.x - xy_thresh)
+ let y_out_of_bounds = (v[poseId].t.y > firstObb.center.t.y + xy_thresh) || (v[poseId].t.y < firstObb.center.t.y - xy_thresh)
+ let theta_out_of_bounds = (v[poseId].rot.theta > firstObb.center.rot.theta + theta_thresh) || (v[poseId].rot.theta < firstObb.center.rot.theta - theta_thresh)
+ if !x_out_of_bounds && !theta_out_of_bounds && !y_out_of_bounds {
+
+ if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.1 {
+ axs[0,0].plot(startpose.t.x,startpose.t.y,"g,", ms: 1)
+ } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.2 {
+ axs[0,1].plot(startpose.t.x,startpose.t.y,"g,", ms: 1)
+ } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.3 {
+ axs[1,0].plot(startpose.t.x,startpose.t.y,"g,", ms: 1)
+ } else {
+ axs[1,1].plot(startpose.t.x,startpose.t.y,"g,", ms: 1)
+ }
+
+ } else {
+ if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.1 {
+ axs[0,0].plot(startpose.t.x,startpose.t.y,"r,", ms: 1)
+ } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.2 {
+ axs[0,1].plot(startpose.t.x,startpose.t.y,"r,", ms: 1)
+ } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.3 {
+ axs[1,0].plot(startpose.t.x,startpose.t.y,"r,", ms: 1)
+ } else {
+ axs[1,1].plot(startpose.t.x,startpose.t.y,"r,", ms: 1)
+ }
+ }
+ let (figs, axes) = plotFrameWithPatches3(frame: firstFrame, start: startpose, end: v[poseId], expected: firstObb.center, firstGroundTruth: firstObb.center, errors: errors, xs: xs, ys: ys, thetas: thetas)
+ var final_err: Double
+ var label_err: Double
+ var start_err: Double
+
+ final_err = factorRAE.errorVector(v[poseId]).x
+ label_err = factorRAE.errorVector(firstObb.center).x
+ start_err = factorRAE.errorVector(startpose).x
+
+ axes.set_title(String(axes.get_title())! + "\n final err = \(final_err)"
+ + "\n label err = \(label_err).x)"
+ + "\n start err = \(start_err)"
+ + "\n learning rate = \(lr)"
+ + "\n converged = \(conv)")
+ figs.savefig(folderName + "/optimization_final_\(j).png", bbox_inches: "tight")
+ plt.close("all")
+ fig.savefig(folderName + "/optimization_covergence_red_n_green_dots.png", bbox_inches: "tight")
+
+ }
+ }
+ }
+}
diff --git a/Scripts/Brando13.swift b/Scripts/Brando13.swift
new file mode 100644
index 00000000..8678c7cb
--- /dev/null
+++ b/Scripts/Brando13.swift
@@ -0,0 +1,421 @@
+import ArgumentParser
+import SwiftFusion
+import BeeDataset
+import BeeTracking
+import TensorFlow
+import PythonKit
+import Foundation
+import PenguinStructures
+
+/// Brando13: OPTIMIZATION CONVERGENCE VISUALIZATION with LM
+struct Brando13: ParsableCommand {
+ @Option(help: "Run for number of frames")
+ var trackLength: Int = 80
+
+ @Option(help: "Classifier or rae")
+ var useClassifier: Bool = false
+
+ func initialize_and_perturb(p: Pose2) -> (Double, Double, Double, Pose2, VariableAssignments, TypedID, FactorGraph) {
+ // CREATE A PLACEHOLDER FOR POSE
+ var v = VariableAssignments()
+ let poseId = v.store(p)
+ v[poseId].perturbWith(stddev: Vector3(0.3, 8, 4.6))
+ let dx = v[poseId].t.x - p.t.x
+ let dy = v[poseId].t.y - p.t.y
+ let dtheta = v[poseId].rot.theta - p.rot.theta
+ let startpose = v[poseId]
+ let fg = FactorGraph()
+
+ return (dx, dy, dtheta, startpose, v, poseId, fg)
+ }
+
+ func initialize_empty_arrays() -> (Bool, [Double], [Double], [Double], [Double]) {
+ var conv = true
+ var errors = [Double]()
+ var xs = [Double]()
+ var ys = [Double]()
+ var thetas = [Double]()
+ return (conv, errors, xs, ys, thetas)
+ }
+
+
+
+ func run() {
+ let np = Python.import("numpy")
+ let plt = Python.import("matplotlib.pyplot")
+ let trainingDatasetSize = 100
+
+ // LOAD THE IMAGE AND THE GROUND TRUTH ORIENTED BOUNDING BOX
+ let dataDir = URL(fileURLWithPath: "./OIST_Data")
+ let testData = OISTBeeVideo(directory: dataDir, afterIndex: trainingDatasetSize, length: trackLength)!
+ let data = OISTBeeVideo(directory: dataDir, length: trainingDatasetSize)!
+ let frames = testData.frames
+ let firstTrack = testData.tracks[0]
+ // let firstTrack = testData.tracks[5]
+ let firstFrame = frames[0]
+ let firstObb = firstTrack.boxes[0]
+ // let firstObb = firstTrack.boxes[5]
+
+
+ //OPTIMIZER GRADIENT DESCENT
+ // let lr = 1e-7
+ // var optimizer = GradientDescent(learningRate: lr)
+ let it_limit = 200
+ /// The set of steps taken.
+ var step: Int = 0
+
+ /// Desired precision, TODO(fan): make this actually work
+ var precision: Double = 1e-10
+
+ /// The precision of the CGLS solver.
+ var cgls_precision: Double = 1e-10
+
+ /// Maximum number of L-M iterations
+ var max_iteration: Int = 50
+
+ /// Maximum number of G-N iterations
+ var max_inner_iteration: Int = 400
+
+ /// Maximam Lambda
+ var max_lambda: Double = 1e32
+
+ /// Minimum Lambda
+ var min_lambda: Double = 1e-16
+
+ /// Initial Lambda
+ // var initial_lambda: Double = 1e-4
+ var initial_lambda: Double = 1e7
+
+ /// Lambda Factor
+ var lambda_factor: Double = 2
+
+
+ //CREATE A FOLDER TO CONTAIN THE END-RESULT IMAGES OF THE OPTIMIZATION
+ let str: String
+ if useClassifier{
+ str = "NNC"
+ } else {
+ str = "RAE"
+ }
+ let folderName = "Results/LM_optimization_\(str)__17_09_2021_final_images_4subplots"
+ if !FileManager.default.fileExists(atPath: folderName) {
+ do {
+ try FileManager.default.createDirectory(atPath: folderName, withIntermediateDirectories: true, attributes: nil)
+ } catch {
+ print(error.localizedDescription)
+ }
+ }
+
+
+
+
+
+ //CREATE A FIG
+ print("hello1")
+ let (fig, axs) = plt.subplots(2,2).tuple2
+ let fr = np.squeeze(firstFrame.makeNumpyArray())
+ for i in 0...1 {
+ for j in 0...1 {
+ axs[i,j].imshow(fr / 255.0, cmap: "gray")
+ let firstGroundTruth = firstObb.center
+ // axs[i,j].plot(firstObb.corners.map{$0.x} + [firstObb.corners.first!.x], firstObb.corners.map{$0.y} + [firstObb.corners.first!.y], "b-")
+ axs[i,j].set_xlim(firstGroundTruth.t.x - 50, firstGroundTruth.t.x + 50)
+ axs[i,j].set_ylim(firstGroundTruth.t.y - 50, firstGroundTruth.t.y + 50)
+ axs[i,j].get_xaxis().set_visible(false)
+ axs[i,j].get_yaxis().set_visible(false)
+ }
+ }
+ axs[0,0].set_title("fabs(theta) < 5deg", fontsize:8)
+ axs[0,1].set_title("fabs(theta) < 10deg", fontsize:8)
+ axs[1,0].set_title("fabs(theta) < 25deg", fontsize:8)
+ axs[1,1].set_title("fabs(theta) >= 25deg", fontsize:8)
+
+ print("hello")
+ let xy_thresh = 20.0 //pixels
+ let theta_thresh = 0.5 //radians // consider doing overlap.
+
+
+ // NN Params
+ let (imageHeight, imageWidth, imageChannels) = (40, 70, 1)
+ let featureSize = 256
+ let kHiddenDimension = 512
+
+
+ if useClassifier {
+ print("using classifier")
+ // var classifier = NNClassifier(
+ // imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels, hiddenDimension: kHiddenDimension, latentDimension: featureSize
+ // )
+ // classifier.load(weights: np.load("./classifiers/classifiers_today/classifier_weight_\(kHiddenDimension)_\(featureSize)_1_doubletraining.npy", allow_pickle: true))
+
+ // for j in 0...200 {
+ // // RANDOMLY PERTURB THE GROUND TRUTH POSE AND CALCULATE THE PERTURBATION
+ // var (dx, dy, dtheta, startpose, v, poseId, fg) = initialize_and_perturb(p: firstObb.center)
+ // // CREATE THE FACTOR AND FACTOR GRAPH
+ // let factorNNC = ProbablisticTrackingFactor2(poseId,
+ // measurement: firstFrame,
+ // classifier: classifier,
+ // patchSize: (40, 70),
+ // appearanceModelSize: (40, 70)
+ // )
+ // fg.store(factorNNC)
+ // print(firstObb.center)
+
+
+ // // PERFORM GRADIENT DESCENT
+ // var (conv, errors, xs, ys, thetas) = initialize_empty_arrays()
+ // print("starting optimization")
+ // for i in 0.. firstObb.center.t.x + xy_thresh) || (v[poseId].t.x < firstObb.center.t.x - xy_thresh)
+ // let y_out_of_bounds = (v[poseId].t.y > firstObb.center.t.y + xy_thresh) || (v[poseId].t.y < firstObb.center.t.y - xy_thresh)
+ // let theta_out_of_bounds = (v[poseId].rot.theta > firstObb.center.rot.theta + theta_thresh) || (v[poseId].rot.theta < firstObb.center.rot.theta - theta_thresh)
+ // if !x_out_of_bounds && !theta_out_of_bounds && !y_out_of_bounds {
+ // // plot a green dot
+ // // ax.scatter(startpose.t.x-Double(xbegin),startpose.t.y-Double(ybegin),c:"r", marker: ",")
+ // // ax.scatter(startpose.t.x,startpose.t.y,c:"r", marker: ",")
+ // if fabs(startpose.rot.theta*180/Double.pi - firstObb.center.rot.theta*180/Double.pi) < 5 {
+ // axs[0,0].plot(startpose.t.x,startpose.t.y,"g,", ms: 2)
+ // } else if fabs(startpose.rot.theta*180/Double.pi - firstObb.center.rot.theta*180/Double.pi) < 10 {
+ // axs[0,1].plot(startpose.t.x,startpose.t.y,"g,", ms: 2)
+ // } else if fabs(startpose.rot.theta*180/Double.pi - firstObb.center.rot.theta*180/Double.pi) < 25 {
+ // axs[1,0].plot(startpose.t.x,startpose.t.y,"g,", ms: 2)
+ // } else {
+ // axs[1,1].plot(startpose.t.x,startpose.t.y,"g,", ms: 2)
+ // }
+
+ // } else {
+ // // ax.scatter(startpose.t.x-Double(xbegin),startpose.t.y-Double(ybegin),c:"g", marker: ",")
+ // // ax.scatter(startpose.t.x,startpose.t.y,c:"g", marker: ",")
+ // if fabs(startpose.rot.theta*180/Double.pi - firstObb.center.rot.theta*180/Double.pi) < 5 {
+ // axs[0,0].plot(startpose.t.x,startpose.t.y,"r,", ms: 2)
+ // } else if fabs(startpose.rot.theta*180/Double.pi - firstObb.center.rot.theta*180/Double.pi) < 10 {
+ // axs[0,1].plot(startpose.t.x,startpose.t.y,"r,", ms: 2)
+ // } else if fabs(startpose.rot.theta*180/Double.pi - firstObb.center.rot.theta*180/Double.pi) < 25 {
+ // axs[1,0].plot(startpose.t.x,startpose.t.y,"r,", ms: 2)
+ // } else {
+ // axs[1,1].plot(startpose.t.x,startpose.t.y,"r,", ms: 2)
+ // }
+ // }
+ // let (figs, axes) = plotFrameWithPatches3(frame: firstFrame, start: startpose, end: v[poseId], expected: firstObb.center, firstGroundTruth: firstObb.center, errors: errors, xs: xs, ys: ys, thetas: thetas)
+ // var final_err: Double
+ // var label_err: Double
+ // var start_err: Double
+
+
+ // final_err = factorNNC.errorVector(v[poseId]).x
+ // label_err = factorNNC.errorVector(firstObb.center).x
+ // start_err = factorNNC.errorVector(startpose).x
+
+ // axes.set_title(String(axes.get_title())! + "\n final err = \(final_err)"
+ // + "\n label err = \(label_err).x)"
+ // + "\n start err = \(start_err)"
+ // + "\n learning rate = \(lr)"
+ // + "\n converged = \(conv)")
+ // figs.savefig(folderName + "/optimization_final_\(j).png", bbox_inches: "tight")
+ // // let (figs2, axes2) = plotXYandTheta(xs: xs, ys: ys, thetas: thetas)
+ // // figs2.savefig(folderName + "/optimization_final_\(j)_XYtheta.png", bbox_inches: "tight")
+ // plt.close("all")
+ // fig.savefig(folderName + "/optimization_covergence_red_n_green_dots.png", bbox_inches: "tight")
+
+
+
+
+ // }
+
+
+
+
+
+
+ } else {
+ // LOAD RAE AND TRAIN BG AND FG MODELS
+ var rae = DenseRAE(
+ imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels,
+ hiddenDimension: kHiddenDimension, latentDimension: featureSize
+ )
+ rae.load(weights: np.load("./oist_rae_weight_\(featureSize).npy", allow_pickle: true))
+ let (fg, bg, _) = getTrainingBatches(
+ dataset: data, boundingBoxSize: (40, 70), fgBatchSize: 3000, bgBatchSize: 3000,
+ fgRandomFrameCount: 10, bgRandomFrameCount: 10, useCache: true
+ )
+ let batchPositive = rae.encode(fg)
+ let foregroundModel = MultivariateGaussian(from:batchPositive, regularizer: 1e-3)
+ let batchNegative = rae.encode(bg)
+ let backgroundModel = MultivariateGaussian(from: batchNegative, regularizer: 1e-3)
+
+ for j in 0...200 {
+
+ // RANDOMLY PERTURB THE GROUND TRUTH POSE AND CALCULATE THE PERTURBATION
+ var (dx, dy, dtheta, startpose, v, poseId, fg) = initialize_and_perturb(p: firstObb.center)
+ // CREATE THE FACTOR AND FACTOR GRAPH
+ let factorRAE = ProbablisticTrackingFactor(poseId,
+ measurement: firstFrame,
+ encoder: rae,
+ patchSize: (40, 70),
+ appearanceModelSize: (40, 70),
+ foregroundModel: foregroundModel,
+ backgroundModel: backgroundModel,
+ maxPossibleNegativity: 1e7
+ )
+ fg.store(factorRAE)
+ print(firstObb.center)
+ // PERFORM GRADIENT DESCENT
+ var (conv, errors, xs, ys, thetas) = initialize_empty_arrays()
+ print("starting optimization LM")
+ var old_error = fg.linearizableError(at: v)
+
+ var lambda: Double = initial_lambda
+ var inner_iter_step = 0
+ var inner_success = false
+ var all_done = false
+ var i = 0
+ precision = 1e-10
+ max_iteration = 50
+ step = 0
+ for i in 0.. .ulpOfOne && model_fidelity > 0.01 {
+ old_error = this_error
+
+ // Success, decrease lambda
+ if lambda > min_lambda {
+ lambda = lambda / lambda_factor
+ }
+
+ inner_success = true
+ } else {
+
+ // increase lambda and retry
+ v = oldval
+ if lambda > max_lambda {
+ print("OOOOOOOHHHHHH SHIT!")
+ break
+ }
+ lambda = lambda * lambda_factor
+ }
+
+ if model_fidelity > 0.5 && delta_error < precision || this_error < precision {
+ inner_success = true
+ all_done = true
+ break
+ }
+
+ inner_iter_step += 1
+ if inner_success {
+ break
+ }
+ }
+
+ step += 1
+
+ if all_done {
+ break
+ }
+ if i == max_iteration-1 {
+ conv = false
+ }
+ }
+
+
+ // PLOT THE FINAL OPTIMIZATION RESULT
+ let x_out_of_bounds = (v[poseId].t.x > firstObb.center.t.x + xy_thresh) || (v[poseId].t.x < firstObb.center.t.x - xy_thresh)
+ let y_out_of_bounds = (v[poseId].t.y > firstObb.center.t.y + xy_thresh) || (v[poseId].t.y < firstObb.center.t.y - xy_thresh)
+ let theta_out_of_bounds = (v[poseId].rot.theta > firstObb.center.rot.theta + theta_thresh) || (v[poseId].rot.theta < firstObb.center.rot.theta - theta_thresh)
+ if !x_out_of_bounds && !theta_out_of_bounds && !y_out_of_bounds {
+ // plot a green dot
+ // ax.scatter(startpose.t.x-Double(xbegin),startpose.t.y-Double(ybegin),c:"r", marker: ",")
+ // ax.scatter(startpose.t.x,startpose.t.y,c:"r", marker: ",")
+ if fabs(startpose.rot.theta*180/Double.pi - firstObb.center.rot.theta*180/Double.pi) < 5 {
+ axs[0,0].plot(startpose.t.x,startpose.t.y,"g,", ms: 2)
+ } else if fabs(startpose.rot.theta*180/Double.pi - firstObb.center.rot.theta*180/Double.pi) < 10 {
+ axs[0,1].plot(startpose.t.x,startpose.t.y,"g,", ms: 2)
+ } else if fabs(startpose.rot.theta*180/Double.pi - firstObb.center.rot.theta*180/Double.pi) < 25 {
+ axs[1,0].plot(startpose.t.x,startpose.t.y,"g,", ms: 2)
+ } else {
+ axs[1,1].plot(startpose.t.x,startpose.t.y,"g,", ms: 2)
+ }
+
+ } else {
+ // ax.scatter(startpose.t.x-Double(xbegin),startpose.t.y-Double(ybegin),c:"g", marker: ",")
+ // ax.scatter(startpose.t.x,startpose.t.y,c:"g", marker: ",")
+ if fabs(startpose.rot.theta*180/Double.pi - firstObb.center.rot.theta*180/Double.pi) < 5 {
+ axs[0,0].plot(startpose.t.x,startpose.t.y,"r,", ms: 2)
+ } else if fabs(startpose.rot.theta*180/Double.pi - firstObb.center.rot.theta*180/Double.pi) < 10 {
+ axs[0,1].plot(startpose.t.x,startpose.t.y,"r,", ms: 2)
+ } else if fabs(startpose.rot.theta*180/Double.pi - firstObb.center.rot.theta*180/Double.pi) < 25 {
+ axs[1,0].plot(startpose.t.x,startpose.t.y,"r,", ms: 2)
+ } else {
+ axs[1,1].plot(startpose.t.x,startpose.t.y,"r,", ms: 2)
+ }
+ }
+ let (figs, axes) = plotFrameWithPatches3(frame: firstFrame, start: startpose, end: v[poseId], expected: firstObb.center, firstGroundTruth: firstObb.center, errors: errors, xs: xs, ys: ys, thetas: thetas)
+ var final_err: Double
+ var label_err: Double
+ var start_err: Double
+
+ final_err = factorRAE.errorVector(v[poseId]).x
+ label_err = factorRAE.errorVector(firstObb.center).x
+ start_err = factorRAE.errorVector(startpose).x
+
+ axes.set_title(String(axes.get_title())! + "\n final err = \(final_err)"
+ + "\n label err = \(label_err).x)"
+ + "\n start err = \(start_err)"
+ + "\n converged = \(conv)")
+ figs.savefig(folderName + "/optimization_final_\(j).png", bbox_inches: "tight")
+ // let (figs2, axes2) = plotXYandTheta(xs: xs, ys: ys, thetas: thetas)
+ // figs2.savefig(folderName + "/optimization_final_\(j)_XYtheta.png", bbox_inches: "tight")
+ plt.close("all")
+ fig.savefig(folderName + "/optimization_covergence_red_n_green_dots.png", bbox_inches: "tight")
+
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/Scripts/Brando14.swift b/Scripts/Brando14.swift
new file mode 100644
index 00000000..70d64f2b
--- /dev/null
+++ b/Scripts/Brando14.swift
@@ -0,0 +1,199 @@
+import ArgumentParser
+import SwiftFusion
+import BeeDataset
+import BeeTracking
+import TensorFlow
+import PythonKit
+import Foundation
+import PenguinStructures
+
+/// Brando14: ERRORVALUE over entire image
+struct Brando14: ParsableCommand {
+ @Option(help: "Run for number of frames")
+ var trackLength: Int = 80
+
+ @Option(help: "Classifier or rae")
+ var useClassifier: Bool = true
+
+
+ func run() {
+ let np = Python.import("numpy")
+ let plt = Python.import("matplotlib.pyplot")
+ let trainingDatasetSize = 100
+
+ // LOAD THE IMAGE AND THE GROUND TRUTH ORIENTED BOUNDING BOX
+ let dataDir = URL(fileURLWithPath: "./OIST_Data")
+ let testData = OISTBeeVideo(directory: dataDir, afterIndex: trainingDatasetSize, length: trackLength)!
+ let data = OISTBeeVideo(directory: dataDir, length: trainingDatasetSize)!
+ let frames = testData.frames
+ let firstTrack = testData.tracks[0]
+ let firstFrame = frames[0]
+ let firstObb = firstTrack.boxes[0]
+
+ let range = 100.0
+
+ // NN Params
+ let (imageHeight, imageWidth, imageChannels) = (40, 70, 1)
+ let featureSize = 512
+ let kHiddenDimension = 512
+
+
+ //CREATE A FOLDER TO CONTAIN THE END-RESULT IMAGES OF THE OPTIMIZATION
+ let str: String
+ if useClassifier{
+ str = "NNC"
+ } else {
+ str = "RAE"
+ }
+ let lr = 1e-6
+ let folderName = "Results/ErrorValueVizualized_\(str)_20000boxes_300epochs_retrained(0.0, 30, 0)_lr=\(lr)_2nd_iter.npy"
+ if !FileManager.default.fileExists(atPath: folderName) {
+ do {
+ try FileManager.default.createDirectory(atPath: folderName, withIntermediateDirectories: true, attributes: nil)
+ } catch {
+ print(error.localizedDescription)
+ }
+ }
+
+
+
+ let firstGroundTruth = firstObb.center
+ print("oBB coordinates", firstGroundTruth.t.x, firstGroundTruth.t.y)
+
+ //CREATE A FIG
+ print("hello1")
+ let (fig, axs) = plt.subplots(1,2).tuple2
+ let fr = np.squeeze(firstFrame.makeNumpyArray())
+ axs[0].imshow(fr / 255.0, cmap: "gray")
+
+
+ axs[0].set_xlim(firstGroundTruth.t.x - range/2, firstGroundTruth.t.x + range/2)
+ axs[0].set_ylim(firstGroundTruth.t.y - range/2, firstGroundTruth.t.y + range/2)
+ axs[1].set_xlim(0, range)
+ axs[1].set_ylim(0, range)
+
+ let x = firstGroundTruth.t.x
+ let y = firstGroundTruth.t.y
+
+
+
+
+
+ var values = Tensor(zeros: [Int(range), Int(range)])
+ print("printing tensor",values)
+
+ if useClassifier {
+ var classifier = NNClassifier(
+ imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels, hiddenDimension: kHiddenDimension, latentDimension: featureSize
+ )
+ classifier.load(weights: np.load("./classifiers/classifiers_today/classifier_weight_512_512_1_20000boxes_300epochs_retrained(0.0, 30, 0)_lr=\(lr)_2nd_iter.npy", allow_pickle: true))
+
+ print("done loading")
+ for i in 0...Int(range)-1 {
+ for j in 0...Int(range)-1 {
+ let t = Vector2(x-range/2+Double(i), y-range/2+Double(j))
+ // print("here3")
+ let p = Pose2(firstGroundTruth.rot, t)
+ var v = VariableAssignments()
+ let poseId = v.store(p)
+ let startpose = v[poseId]
+ var fg = FactorGraph()
+ // CREATE THE FACTOR AND FACTOR GRAPH
+ let factorNNC = ProbablisticTrackingFactor2(poseId,
+ measurement: firstFrame,
+ classifier: classifier,
+ patchSize: (40, 70),
+ appearanceModelSize: (40, 70)
+ )
+ fg.store(factorNNC)
+ values[i,j] = Tensor(factorNNC.errorVector(v[poseId]).x)
+ // print(Tensor(factorNNC.errorVector(v[poseId]).x))
+
+
+
+
+
+ }
+ print("row", i)
+ }
+ let min_val = values.min()
+ if Double(min_val)! < 0 {
+ values = values-min_val
+ }
+ values = values/values.max()*255
+ print(values[0...,0])
+ print(values.shape)
+ axs[1].imshow(values.makeNumpyArray())
+ fig.savefig(folderName + "/vizual_NNC.png", bbox_inches: "tight")
+
+
+
+
+
+
+
+
+
+
+ } else {
+ print("RAE")
+ // LOAD RAE AND TRAIN BG AND FG MODELS
+ var rae = DenseRAE(
+ imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels,
+ hiddenDimension: kHiddenDimension, latentDimension: featureSize
+ )
+ rae.load(weights: np.load("./oist_rae_weight_\(featureSize).npy", allow_pickle: true))
+ let (fg, bg, _) = getTrainingBatches(
+ dataset: data, boundingBoxSize: (40, 70), fgBatchSize: 3000, bgBatchSize: 3000,
+ fgRandomFrameCount: 10, bgRandomFrameCount: 10, useCache: true
+ )
+ let batchPositive = rae.encode(fg)
+ let foregroundModel = MultivariateGaussian(from:batchPositive, regularizer: 1e-3)
+ let batchNegative = rae.encode(bg)
+ let backgroundModel = MultivariateGaussian(from: batchNegative, regularizer: 1e-3)
+ for i in 0...Int(range)-1 {
+ for j in 0...Int(range)-1 {
+ let t = Vector2(x-50.0+Double(i), y-50.0+Double(j))
+ let p = Pose2(firstGroundTruth.rot, t)
+ var v = VariableAssignments()
+ let poseId = v.store(p)
+ let startpose = v[poseId]
+ var fg = FactorGraph()
+ // CREATE THE FACTOR AND FACTOR GRAPH
+ let factorRAE = ProbablisticTrackingFactor(poseId,
+ measurement: firstFrame,
+ encoder: rae,
+ patchSize: (40, 70),
+ appearanceModelSize: (40, 70),
+ foregroundModel: foregroundModel,
+ backgroundModel: backgroundModel,
+ maxPossibleNegativity: 1e7
+ )
+ fg.store(factorRAE)
+ values[i,j] = Tensor(factorRAE.errorVector(v[poseId]).x)
+
+
+
+
+ }
+ print("row", i)
+ }
+ print(values[0...,0])
+ let min_val = values.min()
+ if Double(min_val)! < 0 {
+ values = values-min_val
+ }
+ values = values/values.max()*255
+ print(values[0...,0])
+ print(values.shape)
+ axs[1].imshow(values.makeNumpyArray())
+
+ fig.savefig(folderName + "/vizual_RAE.png", bbox_inches: "tight")
+
+
+
+
+
+ }
+ }
+}
\ No newline at end of file
diff --git a/Scripts/Brando15.swift b/Scripts/Brando15.swift
new file mode 100644
index 00000000..59b9ccf9
--- /dev/null
+++ b/Scripts/Brando15.swift
@@ -0,0 +1,91 @@
+import ArgumentParser
+
+import SwiftFusion
+import BeeDataset
+import BeeTracking
+import TensorFlow
+import PythonKit
+import Foundation
+
+
+
+
+/// Brando15 SAVE PATCHES FOR LATER USE
+struct Brando15: ParsableCommand {
+ typealias LikelihoodModel = TrackingLikelihoodModel
+
+
+ @Flag(help: "Training mode")
+ var training: Bool = false
+
+ let num_boxes: Int = 10000
+
+ func getTrainingDataBG(
+ from dataset: OISTBeeVideo
+ ) -> (Tensor, Tensor) {
+ print("bg")
+
+ // var allBoxes = [LikelihoodModel.Datum]()
+ let bgBoxes = dataset.makeBackgroundBoundingBoxes(patchSize: (40, 70), batchSize: num_boxes).map {
+ // (frame: $0.frame, type: LikelihoodModel.PatchType.bg, obb: $0.obb)
+ $0.frame!.patch(at: $0.obb)
+ }
+ print("bg2")
+ let labels = Tensor(zeros: [num_boxes])
+ print("labels done bg")
+ let patches = Tensor(stacking: bgBoxes.map {$0})
+ print("patches done bg")
+ return (labels, patches)
+ }
+
+
+ func getTrainingDataFG(
+ from dataset: OISTBeeVideo
+ ) -> (Tensor, Tensor) {
+ print("fg")
+ // var allBoxes = [LikelihoodModel.Datum]()
+ let fgBoxes = dataset.makeForegroundBoundingBoxes(patchSize: (40, 70), batchSize: num_boxes).map {
+ // (frame: $0.frame, type: LikelihoodModel.PatchType.bg, obb: $0.obb)
+ $0.frame!.patch(at: $0.obb)
+ }
+ print("bg2")
+ let labels = Tensor(ones: [num_boxes])
+ print("labels done bg")
+ let patches = Tensor(stacking: fgBoxes.map {$0})
+ print("patches done bg")
+ return (labels, patches)
+ }
+
+
+
+
+ // Just runs an RP tracker and saves image to file
+ // Make sure you have a folder `Results/fan12` before running
+ func run() {
+ let folderName = "classifiers/classifiers_today"
+ if !FileManager.default.fileExists(atPath: folderName) {
+ do {
+ try FileManager.default.createDirectory(atPath: folderName, withIntermediateDirectories: true, attributes: nil)
+ } catch {
+ print(error.localizedDescription)
+ }
+ } else {
+ print("folder exists")
+ }
+
+
+ let dataDir = URL(fileURLWithPath: "./OIST_Data")
+ let trainingDataset = OISTBeeVideo(directory: dataDir, length: 100)!
+ var (labels_bg, patches_bg) = getTrainingDataBG(from: trainingDataset)
+ let np = Python.import("numpy")
+ np.save("Patches_bg_\(num_boxes).npy", patches_bg.makeNumpyArray())
+ var (labels_fg, patches_fg) = getTrainingDataFG(from: trainingDataset)
+
+ // var patches = concatenate(patches_bg, patches_fg)
+ var patches = Tensor(stacking: patches_bg.unstacked() + patches_fg.unstacked())
+ var labels = Tensor(concatenate(labels_bg, labels_fg))
+ print("shape of patches", patches.shape)
+ print("shape of labels", labels.shape)
+ np.save("Patches_bg_fg_\(num_boxes).npy", patches.makeNumpyArray())
+ }
+}
diff --git a/Scripts/Brando16.swift b/Scripts/Brando16.swift
new file mode 100644
index 00000000..43343228
--- /dev/null
+++ b/Scripts/Brando16.swift
@@ -0,0 +1,126 @@
+import ArgumentParser
+
+import SwiftFusion
+import BeeDataset
+import BeeTracking
+import TensorFlow
+import PythonKit
+import Foundation
+
+import PenguinStructures
+
+/// PCA tests
+struct Brando16: ParsableCommand {
+ typealias LikelihoodModel = TrackingLikelihoodModel
+
+ @Option(help: "Run for number of frames")
+ var trackLength: Int = 80
+
+
+ func getTrainingData(
+ from dataset: OISTBeeVideo,
+ numberForeground: Int = 4500
+ ) -> [LikelihoodModel.Datum] {
+ let fgBoxes = dataset.makeForegroundBoundingBoxes(patchSize: (40, 70), batchSize: numberForeground).map {
+ (frame: $0.frame, type: LikelihoodModel.PatchType.fg, obb: $0.obb)
+ }
+
+ return fgBoxes
+ }
+
+ // Runs RAE tracker on n number of sequences and outputs relevant images and statistics
+ // Make sure you have a folder `Results/andrew01` before running
+ func run() {
+ let np = Python.import("numpy")
+ let pickle = Python.import("pickle")
+ // used to be 512
+
+ let (imageHeight, imageWidth, imageChannels) =
+ (40, 70, 1)
+
+
+ var kHiddenDimension = [16, 64, 256]
+ for dim in kHiddenDimension {
+ let dataDir = URL(fileURLWithPath: "./OIST_Data")
+
+ let trainingDataset = OISTBeeVideo(directory: dataDir, length: 100)!
+
+ let trainingData = Tensor(stacking: getTrainingData(from: trainingDataset).map { $0.frame!.patch(at: $0.obb) })
+ let testData = OISTBeeVideo(directory: dataDir, afterIndex: 100, length: trackLength)!
+
+
+ var statistics = FrameStatistics(Tensor(0.0))
+ statistics.mean = Tensor(62.26806976644069)
+ statistics.standardDeviation = Tensor(37.44683834503672)
+ let trainingBatch = trainingDataset.makeBatch(statistics: statistics, appearanceModelSize: (imageHeight, imageWidth), batchSize: 4500)
+ let rae = PCAEncoder(from: trainingBatch, given: dim)
+
+
+
+ let trackerEvaluation = TrackerEvaluationDataset(testData)
+ print("s1")
+ let evalTracker: Tracker = {frames, start in
+ var tracker = trainProbabilisticTracker(
+ trainingData: trainingDataset,
+ encoder: rae,
+ frames: frames,
+ boundingBoxSize: (40, 70),
+ withFeatureSize: dim,
+ fgRandomFrameCount: 100,
+ bgRandomFrameCount: 100
+ )
+ let prediction = tracker.infer(knownStart: Tuple1(start.center), withSampling: true)
+ let track = tracker.frameVariableIDs.map { OrientedBoundingBox(center: prediction[unpack($0)], rows: 40, cols:70) }
+
+ return track
+ }
+ let plt = Python.import("matplotlib.pyplot")
+ let sequenceCount = 19
+ var results = trackerEvaluation.evaluate(evalTracker, sequenceCount: sequenceCount, deltaAnchor: 175, outputFile: "andrew01")
+
+ for (index, value) in results.sequences.prefix(sequenceCount).enumerated() {
+ var i: Int = 0
+ zip(value.subsequences.first!.frames, zip(value.subsequences.first!.prediction, value.subsequences.first!.groundTruth)).map {
+ let (fig, axes) = plotFrameWithPatches(frame: $0.0, actual: $0.1.0.center, expected: $0.1.1.center, firstGroundTruth: value.subsequences.first!.groundTruth.first!.center)
+ fig.savefig("Results/ppca_\(dim)/sequence\(index)/andrew01_\(i).png", bbox_inches: "tight")
+ plt.close("all")
+ i = i + 1
+ }
+
+
+ let (fig, axes) = plt.subplots(1, 2, figsize: Python.tuple([20, 20])).tuple2
+ fig.suptitle("Tracking positions and Subsequence Average Overlap with Accuracy \(String(format: "%.2f", value.subsequences.first!.metrics.accuracy)) and Robustness \(value.subsequences.first!.metrics.robustness).")
+
+ value.subsequences.map {
+ let encoder = JSONEncoder()
+ let data = try! encoder.encode($0.prediction)
+ FileManager.default.createFile(atPath: "Results/ppca_\(dim)/prediction_ppca_\(dim)_sequence_\(index).json", contents: data, attributes: nil)
+ plotPoseDifference(
+ track: $0.prediction.map{$0.center}, withGroundTruth: $0.groundTruth.map{$0.center}, on: axes[0]
+ )
+ }
+ plotOverlap(
+ metrics: value.subsequences.first!.metrics, on: axes[1]
+ )
+ fig.savefig("Results/ppca_\(dim)/andrew01_subsequence\(index).png", bbox_inches: "tight")
+ print("Accuracy for sequence is \(value.sequenceMetrics.accuracy) with Robustness of \(value.sequenceMetrics.robustness)")
+ }
+
+ print("Accuracy for all sequences is \(results.trackerMetrics.accuracy) with Robustness of \(results.trackerMetrics.robustness)")
+ let f = Python.open("Results/ppca_\(dim)/EAO/rp_\(dim).data", "wb")
+ pickle.dump(results.expectedAverageOverlap.curve, f)
+
+
+ }
+
+ }
+}
+
+/// Returns `t` as a Swift tuple.
+fileprivate func unpack(_ t: Tuple2) -> (A, B) {
+ return (t.head, t.tail.head)
+}
+/// Returns `t` as a Swift tuple.
+fileprivate func unpack(_ t: Tuple1) -> (A) {
+ return (t.head)
+}
\ No newline at end of file
diff --git a/Scripts/Fan03.swift b/Scripts/Fan03.swift
index d153b95b..ce2c1aa8 100644
--- a/Scripts/Fan03.swift
+++ b/Scripts/Fan03.swift
@@ -27,7 +27,7 @@ struct Fan03: ParsableCommand {
let rp = RandomProjection(fromShape: TensorShape([imageHeight, imageWidth, imageChannels]), toFeatureSize: featureSize)
- let (fig, _, _) = runProbabilisticTracker(
+ let (fig, track, gt) = runProbabilisticTracker(
directory: dataDir,
encoder: rp,
onTrack: trackId, forFrames: trackLength, withSampling: true,
@@ -37,5 +37,17 @@ struct Fan03: ParsableCommand {
/// Actual track v.s. ground truth track
fig.savefig("Results/fan03/fan03_track\(trackId)_\(featureSize).pdf", bbox_inches: "tight")
+
+
+ let json = JSONEncoder()
+ json.outputFormatting = .prettyPrinted
+ let track_data = try! json.encode(track)
+ try! track_data.write(to: URL(fileURLWithPath: "Results/fan04/fan04_track_\(trackId)_\(featureSize).json"))
+
+ let gt_data = try! json.encode(gt)
+ try! gt_data.write(to: URL(fileURLWithPath: "Results/fan04/fan04_gt_\(trackId)_\(featureSize).json"))
+
+
+
}
}
diff --git a/Scripts/Fan05.swift b/Scripts/Fan05.swift
index ff43e4c2..1082d207 100644
--- a/Scripts/Fan05.swift
+++ b/Scripts/Fan05.swift
@@ -31,7 +31,6 @@ struct Fan05: ParsableCommand {
let fgBoxes = dataset.makeForegroundBoundingBoxes(patchSize: (40, 70), batchSize: numberForeground).map {
(frame: $0.frame, type: LikelihoodModel.PatchType.fg, obb: $0.obb)
}
-
return fgBoxes + bgBoxes
}
diff --git a/Scripts/Fan12.swift b/Scripts/Fan12.swift
index cf4649b7..ecaaca94 100644
--- a/Scripts/Fan12.swift
+++ b/Scripts/Fan12.swift
@@ -31,7 +31,7 @@ struct Fan12: ParsableCommand {
// Just runs an RP tracker and saves image to file
// Make sure you have a folder `Results/fan12` before running
func run() {
- let kHiddenDimension = 100
+ let kHiddenDimension = 512
let dataDir = URL(fileURLWithPath: "./OIST_Data")
let trainingDataset = OISTBeeVideo(directory: dataDir, length: 100)!
diff --git a/Scripts/main.swift b/Scripts/main.swift
index 9b96f9c1..c8337fea 100644
--- a/Scripts/main.swift
+++ b/Scripts/main.swift
@@ -17,7 +17,11 @@ import PenguinParallelWithFoundation
struct Scripts: ParsableCommand {
static var configuration = CommandConfiguration(
- subcommands: [Andrew01.self, Fan01.self, Fan02.self, Fan03.self, Fan04.self, Fan05.self, Fan10.self, Fan12.self, Fan13.self, Fan14.self,
+ subcommands: [Brando01.self, Brando02.self, Brando03.self, Brando04.self,
+ Brando05.self, Brando06.self, Brando07.self, Brando08.self,
+ Brando10.self, Brando11.self, Brando12.self, Brando13.self, Brando14.self, Brando15.self, Brando16.self, Andrew01.self,
+ Andrew05.self, Andrew06.self, Andrew07.self, Andrew08.self, Fan01.self, Fan02.self, Fan03.self, Fan04.self, Fan05.self,
+ Fan10.self, Fan12.self, Fan13.self, Fan14.self,
Frank01.self, Frank02.self, Frank03.self, Frank04.self])
}
diff --git a/Sources/BeeDataset/Visualization.swift b/Sources/BeeDataset/Visualization.swift
index 6558c08b..78975fb9 100644
--- a/Sources/BeeDataset/Visualization.swift
+++ b/Sources/BeeDataset/Visualization.swift
@@ -1,76 +1,76 @@
-// Copyright 2020 The SwiftFusion Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
+// // Copyright 2020 The SwiftFusion Authors. All Rights Reserved.
+// //
+// // Licensed under the Apache License, Version 2.0 (the "License");
+// // you may not use this file except in compliance with the License.
+// // You may obtain a copy of the License at
+// //
+// // http://www.apache.org/licenses/LICENSE-2.0
+// //
+// // Unless required by applicable law or agreed to in writing, software
+// // distributed under the License is distributed on an "AS IS" BASIS,
+// // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// // See the License for the specific language governing permissions and
+// // limitations under the License.
-import SwiftFusion
-import TensorFlow
-import Plotly
-import ModelSupport
-import Foundation
+// import SwiftFusion
+// import TensorFlow
+// // import Plotly
+// import ModelSupport
+// import Foundation
-/// Creates a Plotly figure that displays `frame`, with optional `boxes` overlaid on
-/// them.
-public func plot(
- _ frame: Tensor, boxes: [(name: String, OrientedBoundingBox)] = [],
- margin: Double = 30, scale: Double = 1
-) -> Plotly.Figure {
- let rows = Double(frame.shape[0])
- let cols = Double(frame.shape[1])
+// /// Creates a Plotly figure that displays `frame`, with optional `boxes` overlaid on
+// /// them.
+// public func plot(
+// _ frame: Tensor, boxes: [(name: String, OrientedBoundingBox)] = [],
+// margin: Double = 30, scale: Double = 1
+// ) -> Plotly.Figure {
+// let rows = Double(frame.shape[0])
+// let cols = Double(frame.shape[1])
- // Axis settings:
- // - no grid
- // - range is the image size
- // - scale is anchored, to preserve image aspect ratio
- // - y axis reversed so that everything is in "(u, v)" coordinates
- let xAx = Layout.XAxis(range: [0, InfoArray(cols)], showGrid: false)
- let yAx = Layout.YAxis(
- autoRange: .reversed, range: [0, InfoArray(rows)], scaleAnchor: .xAxis(xAx), showGrid: false)
+// // Axis settings:
+// // - no grid
+// // - range is the image size
+// // - scale is anchored, to preserve image aspect ratio
+// // - y axis reversed so that everything is in "(u, v)" coordinates
+// let xAx = Layout.XAxis(range: [0, InfoArray(cols)], showGrid: false)
+// let yAx = Layout.YAxis(
+// autoRange: .reversed, range: [0, InfoArray(rows)], scaleAnchor: .xAxis(xAx), showGrid: false)
- let tmpPath = URL(fileURLWithPath: "tmpForPlotlyDisplay.png")
- ModelSupport.Image(Tensor(frame)).save(to: tmpPath)
- let imageData = try! "data:image/png;base64," + Data(contentsOf: tmpPath).base64EncodedString()
+// let tmpPath = URL(fileURLWithPath: "tmpForPlotlyDisplay.png")
+// ModelSupport.Image(Tensor(frame)).save(to: tmpPath)
+// let imageData = try! "data:image/png;base64," + Data(contentsOf: tmpPath).base64EncodedString()
- return Figure(
- data: [
- // Dummy data because Plotly is confused when there is no data.
- Scatter(
- x: [0, cols], y: [0, rows],
- mode: .markers, marker: Shared.GradientMarker(opacity: 0),
- xAxis: xAx, yAxis: yAx
- )
- ] + boxes.map { box in
- Scatter(
- name: box.name,
- x: box.1.corners.map { $0.x },
- y: box.1.corners.map { $0.y },
- xAxis: xAx,
- yAxis: yAx
- )
- },
- layout: Layout(
- width: cols * scale + 2 * margin,
- height: rows * scale + 2 * margin,
- margin: Layout.Margin(l: margin, r: margin, t: margin, b: margin),
- images: [
- Layout.Image(
- visible: true,
- source: imageData,
- layer: .below,
- xSize: cols, ySize: rows,
- sizing: .stretch,
- x: 0, y: 0, xReference: .xAxis(xAx), yReference: .yAxis(yAx)
- )
- ]
- )
- )
-}
\ No newline at end of file
+// return Figure(
+// data: [
+// // Dummy data because Plotly is confused when there is no data.
+// Scatter(
+// x: [0, cols], y: [0, rows],
+// mode: .markers, marker: Shared.GradientMarker(opacity: 0),
+// xAxis: xAx, yAxis: yAx
+// )
+// ] + boxes.map { box in
+// Scatter(
+// name: box.name,
+// x: box.1.corners.map { $0.x },
+// y: box.1.corners.map { $0.y },
+// xAxis: xAx,
+// yAxis: yAx
+// )
+// },
+// layout: Layout(
+// width: cols * scale + 2 * margin,
+// height: rows * scale + 2 * margin,
+// margin: Layout.Margin(l: margin, r: margin, t: margin, b: margin),
+// images: [
+// Layout.Image(
+// visible: true,
+// source: imageData,
+// layer: .below,
+// xSize: cols, ySize: rows,
+// sizing: .stretch,
+// x: 0, y: 0, xReference: .xAxis(xAx), yReference: .yAxis(yAx)
+// )
+// ]
+// )
+// )
+// }
\ No newline at end of file
diff --git a/Sources/BeeTracking/AppearanceRAE+Serialization.swift b/Sources/BeeTracking/AppearanceRAE+Serialization.swift
index 6c0d5f05..dc3116d6 100644
--- a/Sources/BeeTracking/AppearanceRAE+Serialization.swift
+++ b/Sources/BeeTracking/AppearanceRAE+Serialization.swift
@@ -71,4 +71,26 @@ extension DenseRAE {
self.decoder_conv1.numpyWeights
].reduce([], +)
}
+
}
+
+
+extension NNClassifier {
+ /// Loads model weights from the numpy arrays in `weights`.
+ public mutating func load(weights: PythonObject) {
+ self.encoder_conv1.load(weights: weights[0..<2])
+ self.encoder1.load(weights: weights[2..<4])
+ self.encoder2.load(weights: weights[4..<6])
+ self.encoder3.load(weights: weights[6..<8])
+ }
+
+ /// The model weights as numpy arrays.
+ public var numpyWeights: PythonObject {
+ [
+ self.encoder_conv1.numpyWeights,
+ self.encoder1.numpyWeights,
+ self.encoder2.numpyWeights,
+ self.encoder3.numpyWeights
+ ].reduce([], +)
+ }
+}
\ No newline at end of file
diff --git a/Sources/BeeTracking/BigTransfer.swift b/Sources/BeeTracking/BigTransfer.swift
new file mode 100644
index 00000000..d1f57b56
--- /dev/null
+++ b/Sources/BeeTracking/BigTransfer.swift
@@ -0,0 +1,482 @@
+// Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// Original source:
+// "Big Transfer (BiT): General Visual Representation Learning"
+// Alexander Kolesnikov, Lucas Beyer, Xiaohua Zhai, Joan Puigcerver, Jessica Yung, Sylvain Gelly, Neil Houlsby.
+// https://arxiv.org/abs/1912.11370
+
+import Foundation
+import TensorFlow
+import PythonKit
+import BeeDataset
+import SwiftFusion
+
+let subprocess = Python.import("subprocess")
+let np = Python.import("numpy")
+
+
+/// Convenient layer wrapper used to load all of the trained layers from the .npz file downloaded from the
+/// BigTransfer weights repository
+struct BigTransferNamedLayer {
+ let name: String
+ var layer: Tensor
+}
+
+func getResolution(originalResolution: (Int, Int)) -> (Int, Int) {
+ let area = originalResolution.0 * originalResolution.1
+ return area < 96*96 ? (160, 128) : (512, 480)
+ }
+
+/// Get the necessary padding to maintain the network size specified in the BigTransfer architecture
+///
+/// - Parameters:
+/// - kernelSize: size n which represents the height and width of the nxn kernel
+/// - Returns: the left / top padding and the right / bottom padding necessary to maintain correct output sizes
+/// after convolution
+func paddingFromKernelSize(kernelSize: Int) -> [(before: Int, after: Int)] {
+ let padTotal = kernelSize - 1
+ let padBeginning = Int(padTotal / 2)
+ let padEnd = padTotal - padBeginning
+ let padding = [
+ (before: 0, after: 0),
+ (before: padBeginning, after: padEnd),
+ (before: padBeginning, after: padEnd),
+ (before: 0, after: 0)]
+ return padding
+}
+
+/// Get all of the pre-trained layers from the .npz file into a Swift array to load into the BigTransfer model
+///
+/// - Parameters:
+/// - modelName: model name that represents the weights to load from the BigTransfer weights repository
+/// ("BiT-M-R50x1" for example)
+/// - Returns: an array of layers and their associated name in the .npz file downloaded from the weights repository
+func getPretrainedWeightsDict(modelName: String) -> Array {
+ let validTypes = ["BiT-S", "BiT-M"]
+ let validSizes = [(50, 1), (50, 3), (101, 1), (101, 3), (152, 4)]
+ let bitURL = "https://storage.googleapis.com/bit_models/"
+ var knownModels = [String: String]()
+
+ for types in validTypes {
+ for sizes in validSizes {
+ let modelString = types + "-R" + String(sizes.0) + "x" + String(sizes.1)
+ knownModels[modelString] = bitURL + modelString + ".npz"
+ }
+ }
+
+ if let modelPath = knownModels[modelName] {
+ subprocess.call("wget " + modelPath + " .", shell: true)
+ }
+
+ let weights = np.load("./" + modelName + ".npz")
+
+ var weightsArray = Array()
+ for param in weights {
+ weightsArray.append(BigTransferNamedLayer(name: String(param)!, layer: Tensor(numpy: weights[param])!))
+ }
+ return weightsArray
+}
+
+/// A 2D Convolution layer that standardizes the weights before the forward pass. This has been implemented in
+/// accordance with the implementation in https://github.com/google-research/big_transfer/blob/49afe42338b62af9fbe18f0258197a33ee578a6b/bit_pytorch/models.py#L25
+public struct StandardizedConv2D: Layer {
+ public var conv: Conv2D
+
+ public init(
+ filterShape: (Int, Int, Int, Int),
+ strides: (Int, Int) = (1, 1),
+ padding: Padding = .valid,
+ useBias: Bool = true
+ )
+ {
+ self.conv = Conv2D(
+ filterShape: filterShape,
+ strides: strides,
+ padding: padding,
+ useBias: useBias)
+ }
+
+ @differentiable
+ public func callAsFunction(_ input: Tensor) -> Tensor {
+ let axes: Array = [0, 1, 2]
+ var standardizedConv = conv
+ standardizedConv.filter = (standardizedConv.filter - standardizedConv.filter.mean(squeezingAxes: axes)) / sqrt((standardizedConv.filter.variance(squeezingAxes: axes) + 1e-16))
+ return standardizedConv(input)
+ }
+
+}
+
+/// A standardized convolution and group norm layer as specified in the BigTransfer architecture
+public struct ConvGNV2BiT: Layer {
+ public var conv: StandardizedConv2D
+ public var norm: GroupNorm
+ @noDerivative public var isSecond: Bool
+
+ public init(
+ inFilters: Int,
+ outFilters: Int,
+ kernelSize: Int = 1,
+ stride: Int = 1,
+ padding: Padding = .valid,
+ isSecond: Bool = false
+ ) {
+ self.conv = StandardizedConv2D(
+ filterShape: (kernelSize, kernelSize, inFilters, outFilters),
+ strides: (stride, stride),
+ padding: padding,
+ useBias: false)
+ self.norm = GroupNorm(
+ offset: Tensor(zeros: [inFilters]),
+ scale: Tensor(zeros: [inFilters]),
+ groupCount: 2,
+ axis: -1,
+ epsilon: 0.001)
+ self.isSecond = isSecond
+ }
+
+ @differentiable
+ public func callAsFunction(_ input: Tensor) -> Tensor {
+ var normResult = norm(input)
+ if self.isSecond {
+ normResult = normResult.padded(forSizes: paddingFromKernelSize(kernelSize: 3))
+ }
+ let reluResult = relu(normResult)
+ let convResult = conv(reluResult)
+ return convResult
+ }
+}
+
+/// The shortcut in a residual block with standardized convolution and group normalization
+public struct ShortcutBiT: Layer {
+ public var projection: StandardizedConv2D
+ public var norm: GroupNorm
+ @noDerivative public let needsProjection: Bool
+
+ public init(inFilters: Int, outFilters: Int, stride: Int) {
+ needsProjection = (stride > 1 || inFilters != outFilters)
+ norm = GroupNorm(
+ offset: Tensor(zeros: [needsProjection ? inFilters : 1]),
+ scale: Tensor(zeros: [needsProjection ? inFilters : 1]),
+ groupCount: needsProjection ? 2 : 1,
+ axis: -1,
+ epsilon: 0.001)
+
+ projection = StandardizedConv2D(
+ filterShape: (1, 1, needsProjection ? inFilters : 1, needsProjection ? outFilters : 1),
+ strides: (stride, stride),
+ padding: .valid,
+ useBias: false)
+ }
+
+ @differentiable
+ public func callAsFunction(_ input: Tensor) -> Tensor {
+ var res = input
+ if needsProjection {
+ res = norm(res)
+ res = relu(res)
+ res = projection(res)
+ }
+ return res
+ }
+}
+
+/// Residual block for BigTransfer with standardized convolution and group normalization layers
+public struct ResidualBlockBiT: Layer {
+ public var shortcut: ShortcutBiT
+ public var convs: [ConvGNV2BiT]
+
+ public init(inFilters: Int, outFilters: Int, stride: Int, expansion: Int){
+ if expansion == 1 {
+ convs = [
+ ConvGNV2BiT(inFilters: inFilters, outFilters: outFilters, kernelSize: 3, stride: stride),
+ ConvGNV2BiT(inFilters: outFilters, outFilters: outFilters, kernelSize: 3, isSecond: true)
+ ]
+ } else {
+ convs = [
+ ConvGNV2BiT(inFilters: inFilters, outFilters: outFilters/4),
+ ConvGNV2BiT(inFilters: outFilters/4, outFilters: outFilters/4, kernelSize: 3, stride: stride, isSecond: true),
+ ConvGNV2BiT(inFilters: outFilters/4, outFilters: outFilters)
+ ]
+ }
+ shortcut = ShortcutBiT(inFilters: inFilters, outFilters: outFilters, stride: stride)
+ }
+
+ @differentiable
+ public func callAsFunction(_ input: Tensor) -> Tensor {
+ let convResult = convs.differentiableReduce(input) { $1($0) }
+ return convResult + shortcut(input)
+ }
+}
+
+/// An implementation of the BigTransfer architecture with variable sizes
+public struct BigTransfer: Layer {
+ public var inputStem: StandardizedConv2D
+ public var maxPool: MaxPool2D
+ public var residualBlocks: [ResidualBlockBiT] = []
+ public var groupNorm : GroupNorm
+ public var flatten = Flatten()
+ public var classifier: Dense
+ public var avgPool = GlobalAvgPool2D()
+ @noDerivative public var finalOutFilter : Int = 0
+
+ /// Initialize the BigTransfer Model
+ ///
+ /// - Parameters:
+ /// - classCount: the number of output classes
+ /// - depth: the specified depht of the network based on the various ResNet architectures
+ /// - inputChannels: the number of input channels for the dataset
+ /// - stemFilters: the number of filters in the first three convolutions
+ public init(
+ classCount: Int,
+ depth: Depth,
+ inputChannels: Int = 3,
+ modelName: String = "BiT-M-R50x1",
+ loadWeights: Bool = true
+ ) {
+
+ self.inputStem = StandardizedConv2D(filterShape: (7, 7, 3, 64), strides: (2, 2), padding: .valid, useBias: false)
+ self.maxPool = MaxPool2D(poolSize: (3, 3), strides: (2, 2), padding: .valid)
+ let sizes = [64 / depth.expansion, 64, 128, 256, 512]
+ for (iBlock, nBlocks) in depth.layerBlockSizes.enumerated() {
+ let (nIn, nOut) = (sizes[iBlock] * depth.expansion, sizes[iBlock+1] * depth.expansion)
+ for j in 0..(
+ offset: Tensor(zeros: [self.finalOutFilter]),
+ scale: Tensor(zeros: [self.finalOutFilter]),
+ groupCount: 2,
+ axis: -1,
+ epsilon: 0.001)
+ self.classifier = Dense(inputSize: 512 * depth.expansion, outputSize: classCount)
+
+ if loadWeights {
+ let weightsArray = getPretrainedWeightsDict(modelName: modelName)
+
+ // Load weights from model .npz file into the BigTransfer model
+ let convs = weightsArray.filter {key in return key.name.contains("/block") && key.name.contains("standardized_conv2d/kernel") && !(key.name.contains("proj"))}
+
+ var k = 0
+ for (idx, i) in self.residualBlocks.enumerated() {
+ for (jdx, _) in i.convs.enumerated() {
+ assert(self.residualBlocks[idx].convs[jdx].conv.conv.filter.shape == convs[k].layer.shape)
+ self.residualBlocks[idx].convs[jdx].conv.conv.filter = convs[k].layer
+ k = k + 1
+ }
+ }
+
+ let projectiveConvs = weightsArray.filter {key in return key.name.contains("/block") && key.name.contains("standardized_conv2d/kernel") && (key.name.contains("proj"))}
+ var normScale = weightsArray.filter {key in return key.name.contains("unit01/a/group_norm/gamma")}
+ var normOffset = weightsArray.filter {key in return key.name.contains("unit01/a/group_norm/beta")}
+
+ k = 0
+ for (idx, i) in self.residualBlocks.enumerated() {
+ if (i.shortcut.projection.conv.filter.shape != [1, 1, 1, 1])
+ {
+ assert(self.residualBlocks[idx].shortcut.projection.conv.filter.shape == projectiveConvs[k].layer.shape)
+ self.residualBlocks[idx].shortcut.projection.conv.filter = projectiveConvs[k].layer
+
+ assert(self.residualBlocks[idx].shortcut.norm.scale.shape == normScale[k].layer.shape)
+ self.residualBlocks[idx].shortcut.norm.scale = normScale[k].layer
+
+ assert(self.residualBlocks[idx].shortcut.norm.offset.shape == normOffset[k].layer.shape)
+ self.residualBlocks[idx].shortcut.norm.offset = normOffset[k].layer
+ k = k + 1
+ }
+ }
+
+ normScale = weightsArray.filter {key in return key.name.contains("gamma")}
+ k = 0
+ for (idx, i) in self.residualBlocks.enumerated() {
+ for (jdx, _) in i.convs.enumerated() {
+ assert(normScale[k].layer.shape == self.residualBlocks[idx].convs[jdx].norm.scale.shape)
+ self.residualBlocks[idx].convs[jdx].norm.scale = normScale[k].layer
+ k = k + 1
+ }
+ }
+
+ normOffset = weightsArray.filter {key in return key.name.contains("beta")}
+
+ var l = 0
+ for (idx, i) in self.residualBlocks.enumerated() {
+ for (jdx, _) in i.convs.enumerated() {
+ assert(normOffset[l].layer.shape == self.residualBlocks[idx].convs[jdx].norm.offset.shape)
+ self.residualBlocks[idx].convs[jdx].norm.offset = normOffset[l].layer
+ l = l + 1
+ }
+ }
+
+ assert(self.groupNorm.scale.shape == normScale[k].layer.shape)
+ self.groupNorm.scale = normScale[k].layer
+ assert(self.groupNorm.offset.shape == normOffset[l].layer.shape)
+ self.groupNorm.offset = normOffset[l].layer
+
+ let rootConvs = weightsArray.filter {key in return key.name.contains("root_block")}
+ assert(self.inputStem.conv.filter.shape == rootConvs[0].layer.shape)
+ self.inputStem.conv.filter = rootConvs[0].layer
+ }
+ }
+
+ @differentiable(wrt: imageBatch)
+ public func classify(_ imageBatch: Tensor) -> Tensor {
+ var grayscale = Tensor(imageBatch.unstacked(alongAxis: 3)[0])
+ var rgbImages = Tensor(stacking: [grayscale, grayscale, grayscale], alongAxis: 3)
+
+ var resizeSize = getResolution(originalResolution: (40, 70))
+ var resized = resize(images: rgbImages, size: (resizeSize.0, resizeSize.1))
+ return Tensor(callAsFunction(resized))
+ }
+
+ @differentiable
+ public func callAsFunction(_ input: Tensor) -> Tensor {
+ var paddedInput = input.padded(forSizes: paddingFromKernelSize(kernelSize: 7))
+ paddedInput = inputStem(paddedInput).padded(forSizes: paddingFromKernelSize(kernelSize: 3))
+ let inputLayer = maxPool(paddedInput)
+ let blocksReduced = residualBlocks.differentiableReduce(inputLayer) { $1($0) }
+ let normalized = relu(groupNorm(blocksReduced))
+ return normalized.sequenced(through: avgPool, flatten, classifier)
+ }
+
+ public func save(new_path: String = "new_weights", path: String = "BiT-M-R50x1") {
+ var weightsArray = getPretrainedWeightsDict(modelName: path)
+
+ // Load weights from model .npz file into the BigTransfer model
+ var convs = weightsArray.filter {key in return key.name.contains("/block") && key.name.contains("standardized_conv2d/kernel") && !(key.name.contains("proj"))}
+ var k = 0
+ for (idx, i) in self.residualBlocks.enumerated() {
+ for (jdx, _) in i.convs.enumerated() {
+ assert(self.residualBlocks[idx].convs[jdx].conv.conv.filter.shape == convs[k].layer.shape)
+ convs[k].layer = self.residualBlocks[idx].convs[jdx].conv.conv.filter
+ k = k + 1
+ }
+ }
+
+ var projectiveConvs = weightsArray.filter {key in return key.name.contains("/block") && key.name.contains("standardized_conv2d/kernel") && (key.name.contains("proj"))}
+ var normScale = weightsArray.filter {key in return key.name.contains("unit01/a/group_norm/gamma")}
+ var normOffset = weightsArray.filter {key in return key.name.contains("unit01/a/group_norm/beta")}
+
+ k = 0
+ for (idx, i) in self.residualBlocks.enumerated() {
+ if (i.shortcut.projection.conv.filter.shape != [1, 1, 1, 1])
+ {
+ assert(self.residualBlocks[idx].shortcut.projection.conv.filter.shape == projectiveConvs[k].layer.shape)
+ projectiveConvs[k].layer = self.residualBlocks[idx].shortcut.projection.conv.filter
+ assert(self.residualBlocks[idx].shortcut.norm.scale.shape == normScale[k].layer.shape)
+ normScale[k].layer = self.residualBlocks[idx].shortcut.norm.scale
+
+ assert(self.residualBlocks[idx].shortcut.norm.offset.shape == normOffset[k].layer.shape)
+ normOffset[k].layer = self.residualBlocks[idx].shortcut.norm.offset
+ k = k + 1
+ }
+ }
+
+ var gammaNormScale = weightsArray.filter {key in return key.name.contains("gamma")}
+ k = 0
+ for (idx, i) in self.residualBlocks.enumerated() {
+ for (jdx, _) in i.convs.enumerated() {
+ assert(gammaNormScale[k].layer.shape == self.residualBlocks[idx].convs[jdx].norm.scale.shape)
+ gammaNormScale[k].layer = self.residualBlocks[idx].convs[jdx].norm.scale
+ k = k + 1
+ }
+ }
+
+ var betaNormOffset = weightsArray.filter {key in return key.name.contains("beta")}
+
+ var l = 0
+ for (idx, i) in self.residualBlocks.enumerated() {
+ for (jdx, _) in i.convs.enumerated() {
+ assert(betaNormOffset[l].layer.shape == self.residualBlocks[idx].convs[jdx].norm.offset.shape)
+ betaNormOffset[l].layer = self.residualBlocks[idx].convs[jdx].norm.offset
+ l = l + 1
+ }
+ }
+
+ assert(self.groupNorm.scale.shape == gammaNormScale[k].layer.shape)
+ gammaNormScale[k].layer = self.groupNorm.scale
+ assert(self.groupNorm.offset.shape == betaNormOffset[l].layer.shape)
+ betaNormOffset[l].layer = self.groupNorm.offset
+
+ var rootConvs = weightsArray.filter {key in return key.name.contains("root_block")}
+ assert(self.inputStem.conv.filter.shape == rootConvs[0].layer.shape)
+ rootConvs[0].layer = self.inputStem.conv.filter
+
+ var newWeights = convs + projectiveConvs + normScale + normOffset + gammaNormScale + betaNormOffset
+
+ var weightDict: [String: Tensor] = [rootConvs[0].name: rootConvs[0].layer]
+
+ for (i, weight) in newWeights.enumerated() {
+ weightDict[weight.name] = weight.layer
+ }
+
+ let np = Python.import("numpy")
+ let zipfile = Python.import("zipfile")
+ let format = Python.import("numpy.lib.format")
+ let compat = Python.import("numpy.compat")
+ var file = compat.os_fspath(new_path + ".npz")
+ var zipf = zipfile.ZipFile(file, mode: "w", compression: zipfile.ZIP_STORED, allowZip64: true)
+
+ for (i, weight) in weightsArray.enumerated() {
+ var fname = weight.name + ".npy"
+ let keyExists = weightDict[weight.name] != nil
+ var val = np.asanyarray( weight.layer.makeNumpyArray())
+ if keyExists {
+ print("key does exist")
+ val = weightDict[weight.name]!.makeNumpyArray()
+ }
+ var file_id = zipf.open(fname, "w", force_zip64: true)
+ format.write_array(file_id, weight.layer.makeNumpyArray(),
+ allow_pickle: true)
+ }
+
+ }
+}
+
+
+
+extension BigTransfer {
+ public enum Depth {
+ case resNet18
+ case resNet34
+ case resNet50
+ case resNet101
+ case resNet152
+
+ var expansion: Int {
+ switch self {
+ case .resNet18, .resNet34: return 1
+ default: return 4
+ }
+ }
+
+ var layerBlockSizes: [Int] {
+ switch self {
+ case .resNet18: return [2, 2, 2, 2]
+ case .resNet34: return [3, 4, 6, 3]
+ case .resNet50: return [3, 4, 6, 3]
+ case .resNet101: return [3, 4, 23, 3]
+ case .resNet152: return [3, 8, 36, 3]
+ }
+ }
+ }
+}
+
+extension BigTransfer: Classifier {}
\ No newline at end of file
diff --git a/Sources/BeeTracking/NNClassifier.swift b/Sources/BeeTracking/NNClassifier.swift
new file mode 100644
index 00000000..cf49f0b7
--- /dev/null
+++ b/Sources/BeeTracking/NNClassifier.swift
@@ -0,0 +1,607 @@
+// Copyright 2020 The SwiftFusion Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+import SwiftFusion
+import TensorFlow
+import PythonKit
+import BeeDataset
+
+// MARK: - The Regularized Autoencoder model
+/// A Regularized Autoencoder (RAE) [1] that encodes the appearance of an image patch.
+///
+
+public struct BeeBatch {
+ let patch: Tensor
+ let label: Tensor
+}
+/// Conform `IrisBatch` to `Collatable` so that we can load it into a `TrainingEpoch`.
+extension BeeBatch: Collatable {
+ public init(collating samples: BatchSamples)
+ where BatchSamples.Element == Self {
+ patch = Tensor(stacking: samples.map{$0.patch})
+ label = Tensor(stacking: samples.map{$0.label})
+ }
+}
+
+
+/// [1] https://openreview.net/forum?id=S1g7tpEYDS
+public struct NNClassifier: Layer{
+ /// The height of the input image in pixels.
+ @noDerivative public let imageHeight: Int
+
+ /// The width of the input image in pixels.
+ @noDerivative public let imageWidth: Int
+
+ /// The number of channels in the input image.
+ @noDerivative public let imageChannels: Int
+
+ /// The number of activations in the hidden layer.
+ @noDerivative public let hiddenDimension: Int
+
+ /// The number of activations in the appearance code.
+ @noDerivative public let latentDimension: Int
+
+ /// First conv to downside the image
+ public var encoder_conv1: Conv2D
+
+ /// Max pooling of factor 2
+ var encoder_pool1: MaxPool2D
+
+ /// First FCN encoding layer goes from image to hidden dimension
+ public var encoder1: Dense
+
+ /// Second goes from dense features to latent code
+ public var encoder2: Dense
+
+ /// Third goes from latent to 1
+ public var encoder3: Dense
+
+ // /// Decode from latent to dense hidden layer with same dimsnions as before
+ // var decoder1: Dense
+
+ // /// Finally, reconstruct grayscale (or RGB) image
+ // var decoder2: Dense
+
+ // var decoder_upsample1: UpSampling2D
+
+ // var decoder_conv1: Conv2D
+
+ /// Creates an instance for images with size `[imageHeight, imageWidth, imageChannels]`, with
+ /// hidden and latent dimensions given by `hiddenDimension` and `latentDimension`.
+ public init(
+ imageHeight: Int, imageWidth: Int, imageChannels: Int,
+ hiddenDimension: Int, latentDimension: Int
+ ) {
+ self.imageHeight = imageHeight
+ self.imageWidth = imageWidth
+ self.imageChannels = imageChannels
+ self.hiddenDimension = hiddenDimension
+ self.latentDimension = latentDimension
+
+ encoder_conv1 = Conv2D(filterShape: (3, 3, imageChannels, imageChannels), padding: .same, activation: relu)
+
+ encoder_pool1 = MaxPool2D(poolSize: (2, 2), strides: (2, 2), padding: .same)
+
+ encoder1 = Dense(
+ inputSize: imageHeight * imageWidth * imageChannels / 4,
+ outputSize: hiddenDimension,
+ activation: relu)
+
+ encoder2 = Dense(
+ inputSize: hiddenDimension,
+ outputSize: latentDimension,
+ activation: relu)
+
+ encoder3 = Dense(
+ inputSize: latentDimension,
+ outputSize: 2)
+
+ }
+
+ /// Initialize given an image batch
+ public typealias HyperParameters = (hiddenDimension: Int, latentDimension: Int, weightFile: String, learningRate: Float)
+ // public init(from imageBatch: Tensor, given parameters: HyperParameters? = nil) {
+ public init(patches patches: Tensor, labels labels: Tensor, given parameters: HyperParameters? = nil, train_mode: String) {
+ print("init from image batch")
+ let (H_, W_, C_) = (patches.shape[1], patches.shape[2], 1)
+ let h = parameters!.hiddenDimension
+ let d = parameters!.latentDimension
+ var model = NNClassifier(imageHeight: H_, imageWidth: W_, imageChannels: C_,
+ hiddenDimension: h, latentDimension: d)
+ if train_mode == "pretrained" {
+ print("PRETRAINED")
+ let np = Python.import("numpy")
+ print("loading pretrained weights")
+ model.load(weights: np.load(parameters!.weightFile, allow_pickle: true))
+ }
+
+
+
+ let optimizer = Adam(for: model)
+ optimizer.learningRate = parameters!.learningRate
+
+ let lossFunc = NNClassifierLoss()
+ // Issues I came across: TrainingEpochs function was scrambling the order
+ // Then the map function was too slow during training.
+
+ // Thread-local variable that model layers read to know their mode
+ Context.local.learningPhase = .training
+
+ let trainingData : [BeeBatch] = (zip(patches.unstacked(), labels.unstacked()).map{BeeBatch(patch: $0.0, label: $0.1)})
+
+ let epochs = TrainingEpochs(samples: trainingData, batchSize: 200) // this is an array
+ //
+ var trainLossResults: [Double] = []
+ let epochCount = 100
+ for (epochIndex, epoch) in epochs.prefix(epochCount).enumerated() {
+ var epochLoss: Double = 0
+ var batchCount: Int = 0
+ for batchSamples in epoch {
+ let batch = batchSamples.collated
+ let (loss, grad) = valueWithGradient(at: model) { lossFunc($0, batch) }
+ optimizer.update(&model, along: grad)
+ epochLoss += loss.scalarized()
+ batchCount += 1
+ }
+ epochLoss /= Double(batchCount)
+ trainLossResults.append(epochLoss)
+ if epochIndex % 5 == 0 {
+ print("\nEpoch \(epochIndex):", terminator:"")
+ }
+ print(" \(epochLoss),", terminator: "")
+ }
+
+ self = model
+ }
+
+ /// Differentiable encoder
+ @differentiable(wrt: imageBatch)
+ public func classify(_ imageBatch: Tensor) -> Tensor {
+ let batchSize = imageBatch.shape[0]
+ let expectedShape: TensorShape = [batchSize, imageHeight, imageWidth, imageChannels]
+ precondition(
+ imageBatch.shape == expectedShape,
+ "input shape is \(imageBatch.shape), but expected \(expectedShape)")
+ return imageBatch
+ .sequenced(through: encoder_conv1, encoder_pool1).reshaped(to: [batchSize, imageHeight * imageWidth * imageChannels / 4])
+ .sequenced(through: encoder1, encoder2, encoder3)
+ }
+
+ /// Standard: add syntactic sugar to apply model as a function call.
+ @differentiable
+ public func callAsFunction(_ imageBatch: Tensor) -> Tensor {
+ let output = classify(imageBatch)
+ return output
+ }
+}
+
+
+
+/// The loss function for the `DenseRAE`.
+public struct NNClassifierLoss {
+
+ /// Return the loss of `model` on `imageBatch`.
+ /// Parameter printLoss: Whether to print the loss and its components.
+ @differentiable
+ public func callAsFunction(
+ _ model: NNClassifier, _ imageBatch: BeeBatch, printLoss: Bool = false
+ ) -> Tensor {
+ let batchSize = imageBatch.patch.shape[0]
+ let output = model(imageBatch.patch)
+ let totalLoss = softmaxCrossEntropy(logits: output, labels: Tensor(imageBatch.label))
+ return totalLoss
+ }
+
+}
+
+extension NNClassifier: Classifier {}
+
+
+
+public struct PretrainedNNClassifier : Classifier{
+ public var inner: NNClassifier
+
+ /// Constructor that does training of the network
+ public init(patches patches: Tensor, labels labels: Tensor, given: HyperParameters, train_mode: String) {
+ inner = NNClassifier(
+ patches: patches, labels: labels, given: (given != nil) ?
+ (hiddenDimension: given.hiddenDimension,
+ latentDimension: given.latentDimension,
+ weightFile: given.weightFile,
+ learningRate: given.learningRate) : nil, train_mode: train_mode
+ )
+
+
+ }
+
+ /// Save the weight to file
+ public func save(to path: String) {
+ let np = Python.import("numpy")
+ np.save(path, np.array(inner.numpyWeights, dtype: Python.object))
+ }
+
+ @differentiable
+ public func classify(_ imageBatch: Tensor) -> Tensor {
+ inner.classify(imageBatch)
+ }
+
+ /// Initialize given an image batch
+ public typealias HyperParameters = (hiddenDimension: Int, latentDimension: Int, weightFile: String, learningRate: Float)
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+// /// [1] https://openreview.net/forum?id=S1g7tpEYDS
+// public struct SmallerNNClassifier: Layer{
+// @noDerivative public let imageHeight: Int
+// @noDerivative public let imageWidth: Int
+// @noDerivative public let imageChannels: Int
+// @noDerivative public let latentDimension: Int
+// public var encoder_conv1: Conv2D
+// var encoder_pool1: MaxPool2D
+// public var encoder1: Dense
+// public var encoder2: Dense
+
+// public init(
+// imageHeight: Int, imageWidth: Int, imageChannels: Int, latentDimension: Int
+// ) {
+// self.imageHeight = imageHeight
+// self.imageWidth = imageWidth
+// self.imageChannels = imageChannels
+// self.latentDimension = latentDimension
+
+// encoder_conv1 = Conv2D(filterShape: (3, 3, imageChannels, imageChannels), padding: .same, activation: relu)
+
+// encoder_pool1 = MaxPool2D(poolSize: (2, 2), strides: (2, 2), padding: .same)
+
+// encoder1 = Dense(
+// inputSize: imageHeight * imageWidth * imageChannels / 4,
+// outputSize: latentDimension,
+// activation: relu)
+
+// encoder2 = Dense(
+// inputSize: latentDimension,
+// outputSize: 2)
+
+// }
+
+// /// Initialize given an image batch
+// public init(patches patches: Tensor, labels labels: Tensor, given latentDimension: Int? = nil) {
+// print("init from image batch")
+// let (H_, W_, C_) = (patches.shape[1], patches.shape[2], 1)
+// let d = latentDimension ?? 10
+// var model = SmallerNNClassifier(imageHeight: H_, imageWidth: W_, imageChannels: C_, latentDimension: d)
+
+// let optimizer = Adam(for: model)
+// optimizer.learningRate = 1e-3
+
+// let lossFunc = NNClassifierLoss()
+// Context.local.learningPhase = .training
+// let trainingData : [BeeBatch] = (zip(patches.unstacked(), labels.unstacked()).map{BeeBatch(patch: $0.0, label: $0.1)})
+// let epochs = TrainingEpochs(samples: trainingData, batchSize: 200) // this is an array
+// var trainLossResults: [Double] = []
+// let epochCount = 600
+// for (epochIndex, epoch) in epochs.prefix(epochCount).enumerated() {
+// var epochLoss: Double = 0
+// var batchCount: Int = 0
+// for batchSamples in epoch {
+// let batch = batchSamples.collated
+// let (loss, grad) = valueWithGradient(at: model) { lossFunc($0, batch) }
+// optimizer.update(&model, along: grad)
+// epochLoss += loss.scalarized()
+// batchCount += 1
+// }
+// epochLoss /= Double(batchCount)
+// trainLossResults.append(epochLoss)
+// // if epochIndex % 50 == 0 {
+// print("Epoch \(epochIndex): Loss: \(epochLoss)")
+// // }
+// }
+
+// self = model
+// }
+
+// /// Differentiable encoder
+// @differentiable(wrt: imageBatch)
+// public func classify(_ imageBatch: Tensor) -> Tensor {
+// let batchSize = imageBatch.shape[0]
+// let expectedShape: TensorShape = [batchSize, imageHeight, imageWidth, imageChannels]
+// precondition(
+// imageBatch.shape == expectedShape,
+// "input shape is \(imageBatch.shape), but expected \(expectedShape)")
+// return imageBatch
+// .sequenced(through: encoder_conv1, encoder_pool1).reshaped(to: [batchSize, imageHeight * imageWidth * imageChannels / 4])
+// .sequenced(through: encoder1, encoder2)
+// }
+
+// /// Standard: add syntactic sugar to apply model as a function call.
+// @differentiable
+// public func callAsFunction(_ imageBatch: Tensor) -> Tensor {
+// let output = classify(imageBatch)
+// return output
+// }
+// }
+
+// public struct LargerNNClassifier: Layer{
+// @noDerivative public let imageHeight: Int
+// @noDerivative public let imageWidth: Int
+// @noDerivative public let imageChannels: Int
+// @noDerivative public let hiddenDimension: Int
+// @noDerivative public let latentDimension: Int
+// public var encoder_conv1: Conv2D
+// var encoder_pool1: MaxPool2D
+// public var encoder1: Dense
+// public var encoder2: Dense
+// public var encoder3: Dense
+// public var encoder4: Dense
+// public init(
+// imageHeight: Int, imageWidth: Int, imageChannels: Int,
+// hiddenDimension: Int, latentDimension: Int
+// ) {
+// self.imageHeight = imageHeight
+// self.imageWidth = imageWidth
+// self.imageChannels = imageChannels
+// self.hiddenDimension = hiddenDimension
+// self.latentDimension = latentDimension
+
+// encoder_conv1 = Conv2D(filterShape: (3, 3, imageChannels, imageChannels), padding: .same, activation: relu)
+
+// encoder_pool1 = MaxPool2D(poolSize: (2, 2), strides: (2, 2), padding: .same)
+
+// encoder1 = Dense(
+// inputSize: imageHeight * imageWidth * imageChannels / 4,
+// outputSize: hiddenDimension,
+// activation: relu)
+
+// encoder2 = Dense(
+// inputSize: hiddenDimension,
+// outputSize: hiddenDimension,
+// activation: relu)
+
+// encoder3 = Dense(
+// inputSize: hiddenDimension,
+// outputSize: latentDimension,
+// activation: relu)
+
+// encoder4 = Dense(
+// inputSize: latentDimension,
+// outputSize: 2)
+
+// }
+
+// /// Initialize given an image batch
+// public typealias HyperParameters = (hiddenDimension: Int, latentDimension: Int)
+// // public init(from imageBatch: Tensor, given parameters: HyperParameters? = nil) {
+// public init(patches patches: Tensor, labels labels: Tensor, given parameters: HyperParameters? = nil) {
+// print("init from image batch")
+// let (H_, W_, C_) = (patches.shape[1], patches.shape[2], 1)
+// let (h,d) = parameters ?? (100,10)
+// var model = LargerNNClassifier(imageHeight: H_, imageWidth: W_, imageChannels: C_,
+// hiddenDimension: h, latentDimension: d)
+// let optimizer = Adam(for: model)
+// optimizer.learningRate = 1e-3
+// let lossFunc = NNClassifierLoss()
+// Context.local.learningPhase = .training
+// let trainingData : [BeeBatch] = (zip(patches.unstacked(), labels.unstacked()).map{BeeBatch(patch: $0.0, label: $0.1)})
+// let epochs = TrainingEpochs(samples: trainingData, batchSize: 200) // this is an array
+// //
+// var trainLossResults: [Double] = []
+// let epochCount = 600
+// for (epochIndex, epoch) in epochs.prefix(epochCount).enumerated() {
+// var epochLoss: Double = 0
+// var batchCount: Int = 0
+// for batchSamples in epoch {
+// let batch = batchSamples.collated
+// let (loss, grad) = valueWithGradient(at: model) { lossFunc($0, batch) }
+// optimizer.update(&model, along: grad)
+// epochLoss += loss.scalarized()
+// batchCount += 1
+// }
+// epochLoss /= Double(batchCount)
+// trainLossResults.append(epochLoss)
+// if epochIndex % 5 == 0 {
+// print("\nEpoch \(epochIndex):", terminator:"")
+// }
+// print(" \(epochLoss),", terminator: "")
+// }
+
+// // if NSFileManager.fileExistsAtPath(path) {
+// // print("File exists")
+// // } else {
+// // print("File does not exist")
+// // }
+// // np.save("epochloss\()", Tensor(trainLossResults).makeNumpyArray())
+
+// self = model
+// }
+
+// /// Differentiable encoder
+// @differentiable(wrt: imageBatch)
+// public func classify(_ imageBatch: Tensor