From c4ba14d86fd89ea32318ec3ab3cc1811b11c15c4 Mon Sep 17 00:00:00 2001 From: Kashif Rasul Date: Sat, 4 Oct 2025 13:36:29 +0200 Subject: [PATCH 01/13] lets callers inject a prebuilt Tokenizer --- README.md | 37 +++++++++++- Sources/Models/LanguageModel.swift | 96 ++++++++++++++++++++++++++---- 2 files changed, 120 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index d6f1a18..2084efc 100644 --- a/README.md +++ b/README.md @@ -88,6 +88,41 @@ example converting and running Mistral 7B using CoreML [here](https://github.com The [modernization of Core ML](https://github.com/huggingface/swift-transformers/pull/257) and corresponding examples were primarily contributed by @joshnewnham, @1duo, @alejandro-isaza, @aseemw. Thank you 🙏 +### Offline CoreML tokenizers + +When you bundle a compiled CoreML model and tokenizer files with your app, you can skip any network requests by injecting +the tokenizer (or a local configuration) when constructing `LanguageModel`: + +```swift +let compiledURL: URL = ... // path to .mlmodelc +let tokenizerFolder: URL = ... // folder containing tokenizer_config.json and tokenizer.json + +let model = try LanguageModel.loadCompiled( + url: compiledURL, + tokenizerFolder: tokenizerFolder +) + +// Or construct the tokenizer yourself (inside an async context) +let tokenizer = try await AutoTokenizer.from(modelFolder: tokenizerFolder) +let modelWithTokenizer = try LanguageModel.loadCompiled( + url: compiledURL, + tokenizer: tokenizer +) +``` + +Make sure the tokenizer assets come from the same Hugging Face repo as the original checkpoint. For the +Mistral example in `Examples/Mistral7B/`, you can fetch the tokenizer like this: + +```bash +huggingface-cli download \ + mistralai/Mistral-7B-Instruct-v0.3 \ + tokenizer.json tokenizer_config.json \ + --local-dir Examples/Mistral7B/local-tokenizer +``` + +If the repo is gated, authenticate with `huggingface-cli login` first. Both initializers reuse the tokenizer +you pass in and never reach out to the Hugging Face Hub. + ## Usage via SwiftPM To use `swift-transformers` with SwiftPM, you can add this to your `Package.swift`: @@ -139,5 +174,3 @@ To format your code, run `swift format -i --recursive .`. ## License [Apache 2](LICENSE). - - diff --git a/Sources/Models/LanguageModel.swift b/Sources/Models/LanguageModel.swift index bb5d94d..c3cf7ff 100644 --- a/Sources/Models/LanguageModel.swift +++ b/Sources/Models/LanguageModel.swift @@ -33,12 +33,30 @@ public class LanguageModel { /// Creates a new language model instance from a CoreML model. /// - /// - Parameter model: The CoreML model to wrap + /// - Parameters: + /// - model: The CoreML model to wrap + /// - configuration: Optional Hub configuration already resolved on disk + /// - tokenizer: Optional preconstructed tokenizer to reuse /// - Important: Triggers a fatal error if the model doesn't have the expected input shape information - public required init(model: MLModel) { + public required init( + model: MLModel, + configuration: LanguageModelConfigurationFromHub? = nil, + tokenizer: Tokenizer? = nil + ) { self.model = model + _tokenizer = tokenizer (minContextLength, maxContextLength) = Self.contextRange(from: model) - configuration = LanguageModelConfigurationFromHub(modelName: modelName) + if let configuration { + self.configuration = configuration + } else if tokenizer == nil { + self.configuration = LanguageModelConfigurationFromHub(modelName: modelName) + } else { + self.configuration = nil + } + } + + public convenience required init(model: MLModel) { + self.init(model: model, configuration: nil, tokenizer: nil) } public func resetState() async {} @@ -142,17 +160,60 @@ public extension LanguageModel { /// - Parameters: /// - url: The URL of the compiled CoreML model file (.mlmodelc) /// - computeUnits: The compute units to use for model inference + /// - configuration: Optional Hub configuration describing tokenizer/model metadata + /// - tokenizer: Optional tokenizer instance to reuse instead of loading from disk /// - Returns: A configured `LanguageModel` instance /// - Throws: An error if the model cannot be loaded from the specified URL - static func loadCompiled(url: URL, computeUnits: MLComputeUnits = .cpuAndGPU) throws -> LanguageModel { + static func loadCompiled( + url: URL, + computeUnits: MLComputeUnits = .cpuAndGPU, + configuration: LanguageModelConfigurationFromHub? = nil, + tokenizer: Tokenizer? = nil + ) throws -> LanguageModel { let config = MLModelConfiguration() config.computeUnits = computeUnits let model = try MLModel(contentsOf: url, configuration: config) return switch kvCacheAvailability(for: model) { - case .statefulKVCache: LanguageModelWithStatefulKVCache(model: model) - default: LanguageModel(model: model) + case .statefulKVCache: + LanguageModelWithStatefulKVCache( + model: model, + configuration: configuration, + tokenizer: tokenizer + ) + default: + LanguageModel( + model: model, + configuration: configuration, + tokenizer: tokenizer + ) } } + + static func loadCompiled( + url: URL, + tokenizerFolder: URL, + computeUnits: MLComputeUnits = .cpuAndGPU + ) throws -> LanguageModel { + let configuration = LanguageModelConfigurationFromHub(modelFolder: tokenizerFolder) + return try loadCompiled( + url: url, + computeUnits: computeUnits, + configuration: configuration + ) + } + + static func loadCompiled( + url: URL, + tokenizer: Tokenizer, + computeUnits: MLComputeUnits = .cpuAndGPU + ) throws -> LanguageModel { + try loadCompiled( + url: url, + computeUnits: computeUnits, + configuration: nil, + tokenizer: tokenizer + ) + } } @available(macOS 15.0, iOS 18.0, *) @@ -304,7 +365,8 @@ public extension LanguageModel { /// - Throws: An error if the configuration cannot be loaded var modelConfig: Config? { get async throws { - try await configuration!.modelConfig + guard let configuration else { return nil } + return try await configuration.modelConfig } } @@ -314,7 +376,8 @@ public extension LanguageModel { /// - Throws: An error if the configuration cannot be loaded var tokenizerConfig: Config? { get async throws { - try await configuration!.tokenizerConfig + guard let configuration else { return nil } + return try await configuration.tokenizerConfig } } @@ -324,7 +387,10 @@ public extension LanguageModel { /// - Throws: An error if the tokenizer data cannot be loaded var tokenizerData: Config { get async throws { - try await configuration!.tokenizerData + guard let configuration else { + throw TokenizerError.missingConfig + } + return try await configuration.tokenizerData } } @@ -434,8 +500,12 @@ public class LanguageModelWithStatefulKVCache: LanguageModel { var state: MLState? - public required init(model: MLModel) { - super.init(model: model) + public required init( + model: MLModel, + configuration: LanguageModelConfigurationFromHub? = nil, + tokenizer: Tokenizer? = nil + ) { + super.init(model: model, configuration: configuration, tokenizer: tokenizer) // To support pre-filling and extend, the input must support // flexible shapes. guard maxContextLength - minContextLength > 1 else { @@ -506,11 +576,15 @@ public class LanguageModelWithStatefulKVCache: LanguageModel { public enum TokenizerError: LocalizedError { /// The tokenizer configuration file could not be found. case tokenizerConfigNotFound + /// The language model configuration required to load tokenizer data is missing. + case missingConfig public var errorDescription: String? { switch self { case .tokenizerConfigNotFound: String(localized: "Tokenizer configuration could not be found. The model may be missing required tokenizer files.", comment: "Error when tokenizer configuration is missing") + case .missingConfig: + String(localized: "Language model configuration was not set, tokenizer assets could not be loaded.", comment: "Error when configuration needed for tokenizer data is missing") } } } From 0d0b00a2a09ed2d281d2a50b939f4fa1c40b682c Mon Sep 17 00:00:00 2001 From: Kashif Rasul Date: Sat, 4 Oct 2025 13:43:48 +0200 Subject: [PATCH 02/13] add a tokenizerFolder argument --- .../Sources/transformers-cli/Transformers.swift | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/Examples/transformers-cli/Sources/transformers-cli/Transformers.swift b/Examples/transformers-cli/Sources/transformers-cli/Transformers.swift index 77732dc..bb8c390 100644 --- a/Examples/transformers-cli/Sources/transformers-cli/Transformers.swift +++ b/Examples/transformers-cli/Sources/transformers-cli/Transformers.swift @@ -49,6 +49,9 @@ struct TransformersCLI: AsyncParsableCommand { @Option(help: "Repetition penalty to discourage repeating tokens (typical: 1.0-2.0, 1.0 = no penalty)") var repetitionPenalty: Float? + @Option(help: "Path to a local folder containing tokenizer_config.json and tokenizer.json") + var tokenizerFolder: String? + func generate( model: LanguageModel, config: GenerationConfig, @@ -104,7 +107,17 @@ struct TransformersCLI: AsyncParsableCommand { let url = URL(filePath: modelPath) let compiledURL = try compile(at: url) print("Loading model \(compiledURL)") - let model = try LanguageModel.loadCompiled(url: compiledURL, computeUnits: computeUnits.asMLComputeUnits) + let model: LanguageModel + if let tokenizerFolder { + let tokenizerURL = URL(filePath: tokenizerFolder, directoryHint: .isDirectory) + model = try LanguageModel.loadCompiled( + url: compiledURL, + tokenizerFolder: tokenizerURL, + computeUnits: computeUnits.asMLComputeUnits + ) + } else { + model = try LanguageModel.loadCompiled(url: compiledURL, computeUnits: computeUnits.asMLComputeUnits) + } var config = model.defaultGenerationConfig config.doSample = doSample From 83c160ed75f9b280d01738d0571455b910056756 Mon Sep 17 00:00:00 2001 From: Kashif Rasul Date: Mon, 27 Oct 2025 08:35:22 +0100 Subject: [PATCH 03/13] Update Examples/transformers-cli/Sources/transformers-cli/Transformers.swift Co-authored-by: Pedro Cuenca --- .../Sources/transformers-cli/Transformers.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Examples/transformers-cli/Sources/transformers-cli/Transformers.swift b/Examples/transformers-cli/Sources/transformers-cli/Transformers.swift index bb8c390..63c4945 100644 --- a/Examples/transformers-cli/Sources/transformers-cli/Transformers.swift +++ b/Examples/transformers-cli/Sources/transformers-cli/Transformers.swift @@ -50,7 +50,7 @@ struct TransformersCLI: AsyncParsableCommand { var repetitionPenalty: Float? @Option(help: "Path to a local folder containing tokenizer_config.json and tokenizer.json") - var tokenizerFolder: String? + var tokenizerPath: String? func generate( model: LanguageModel, From 8688b61601bf4d90c192d8c3040de461ad12cddd Mon Sep 17 00:00:00 2001 From: Kashif Rasul Date: Mon, 27 Oct 2025 08:35:28 +0100 Subject: [PATCH 04/13] Update README.md Co-authored-by: Pedro Cuenca --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b8e2921..4ec7c6b 100644 --- a/README.md +++ b/README.md @@ -110,7 +110,7 @@ let modelWithTokenizer = try LanguageModel.loadCompiled( ) ``` -Make sure the tokenizer assets come from the same Hugging Face repo as the original checkpoint. For the +Make sure the tokenizer assets come from the same Hugging Face repo as the original checkpoint or are compatible with the model you use. For the Mistral example in `Examples/Mistral7B/`, you can fetch the tokenizer like this: ```bash From e20da07bfe9480f061410745088ab3e5b8d3e634 Mon Sep 17 00:00:00 2001 From: Kashif Rasul Date: Mon, 27 Oct 2025 08:35:53 +0100 Subject: [PATCH 05/13] Update Sources/Models/LanguageModel.swift Co-authored-by: Pedro Cuenca --- Sources/Models/LanguageModel.swift | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Sources/Models/LanguageModel.swift b/Sources/Models/LanguageModel.swift index 769f85b..162791b 100644 --- a/Sources/Models/LanguageModel.swift +++ b/Sources/Models/LanguageModel.swift @@ -191,8 +191,8 @@ public extension LanguageModel { static func loadCompiled( url: URL, - tokenizerFolder: URL, - computeUnits: MLComputeUnits = .cpuAndGPU + computeUnits: MLComputeUnits = .cpuAndGPU, + tokenizer tokenizerFolder: URL, ) throws -> LanguageModel { let configuration = LanguageModelConfigurationFromHub(modelFolder: tokenizerFolder) return try loadCompiled( From 528cf30b32071937be0f672290f05a0743da840d Mon Sep 17 00:00:00 2001 From: Kashif Rasul Date: Mon, 27 Oct 2025 08:36:01 +0100 Subject: [PATCH 06/13] Update Sources/Models/LanguageModel.swift Co-authored-by: Pedro Cuenca --- Sources/Models/LanguageModel.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Sources/Models/LanguageModel.swift b/Sources/Models/LanguageModel.swift index 162791b..f2bc4a6 100644 --- a/Sources/Models/LanguageModel.swift +++ b/Sources/Models/LanguageModel.swift @@ -204,8 +204,8 @@ public extension LanguageModel { static func loadCompiled( url: URL, + computeUnits: MLComputeUnits = .cpuAndGPU, tokenizer: Tokenizer, - computeUnits: MLComputeUnits = .cpuAndGPU ) throws -> LanguageModel { try loadCompiled( url: url, From 3d8b7a5ea73cd69607b0ec712b1aa648b0e78433 Mon Sep 17 00:00:00 2001 From: Kashif Rasul Date: Mon, 27 Oct 2025 08:36:10 +0100 Subject: [PATCH 07/13] Update README.md Co-authored-by: Pedro Cuenca --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 4ec7c6b..0de7a5e 100644 --- a/README.md +++ b/README.md @@ -95,7 +95,7 @@ the tokenizer (or a local configuration) when constructing `LanguageModel`: ```swift let compiledURL: URL = ... // path to .mlmodelc -let tokenizerFolder: URL = ... // folder containing tokenizer_config.json and tokenizer.json +let tokenizerURL: URL = ... // folder containing tokenizer_config.json and tokenizer.json let model = try LanguageModel.loadCompiled( url: compiledURL, From f781c3b812a15b733694fdce5e377f40f107c4d0 Mon Sep 17 00:00:00 2001 From: Kashif Rasul Date: Mon, 27 Oct 2025 09:11:35 +0100 Subject: [PATCH 08/13] removed configuration --- README.md | 13 +++------- Sources/Models/LanguageModel.swift | 41 +++--------------------------- 2 files changed, 7 insertions(+), 47 deletions(-) diff --git a/README.md b/README.md index 0de7a5e..f7f106d 100644 --- a/README.md +++ b/README.md @@ -91,20 +91,15 @@ The [modernization of Core ML](https://github.com/huggingface/swift-transformers ### Offline CoreML tokenizers When you bundle a compiled CoreML model and tokenizer files with your app, you can skip any network requests by injecting -the tokenizer (or a local configuration) when constructing `LanguageModel`: +the tokenizer when constructing `LanguageModel`: ```swift let compiledURL: URL = ... // path to .mlmodelc -let tokenizerURL: URL = ... // folder containing tokenizer_config.json and tokenizer.json +let tokenizerFolder: URL = ... // folder containing tokenizer_config.json and tokenizer.json -let model = try LanguageModel.loadCompiled( - url: compiledURL, - tokenizerFolder: tokenizerFolder -) - -// Or construct the tokenizer yourself (inside an async context) +// Construct the tokenizer from local files (inside an async context) let tokenizer = try await AutoTokenizer.from(modelFolder: tokenizerFolder) -let modelWithTokenizer = try LanguageModel.loadCompiled( +let model = try LanguageModel.loadCompiled( url: compiledURL, tokenizer: tokenizer ) diff --git a/Sources/Models/LanguageModel.swift b/Sources/Models/LanguageModel.swift index f2bc4a6..9a0c95e 100644 --- a/Sources/Models/LanguageModel.swift +++ b/Sources/Models/LanguageModel.swift @@ -35,20 +35,16 @@ public class LanguageModel { /// /// - Parameters: /// - model: The CoreML model to wrap - /// - configuration: Optional Hub configuration already resolved on disk /// - tokenizer: Optional preconstructed tokenizer to reuse /// - Important: Triggers a fatal error if the model doesn't have the expected input shape information public required init( model: MLModel, - configuration: LanguageModelConfigurationFromHub? = nil, tokenizer: Tokenizer? = nil ) { self.model = model _tokenizer = tokenizer (minContextLength, maxContextLength) = Self.contextRange(from: model) - if let configuration { - self.configuration = configuration - } else if tokenizer == nil { + if tokenizer == nil { self.configuration = LanguageModelConfigurationFromHub(modelName: modelName) } else { self.configuration = nil @@ -56,7 +52,7 @@ public class LanguageModel { } public convenience required init(model: MLModel) { - self.init(model: model, configuration: nil, tokenizer: nil) + self.init(model: model, tokenizer: nil) } public func resetState() async {} @@ -160,14 +156,12 @@ public extension LanguageModel { /// - Parameters: /// - url: The URL of the compiled CoreML model file (.mlmodelc) /// - computeUnits: The compute units to use for model inference - /// - configuration: Optional Hub configuration describing tokenizer/model metadata /// - tokenizer: Optional tokenizer instance to reuse instead of loading from disk /// - Returns: A configured `LanguageModel` instance /// - Throws: An error if the model cannot be loaded from the specified URL static func loadCompiled( url: URL, computeUnits: MLComputeUnits = .cpuAndGPU, - configuration: LanguageModelConfigurationFromHub? = nil, tokenizer: Tokenizer? = nil ) throws -> LanguageModel { let config = MLModelConfiguration() @@ -177,43 +171,15 @@ public extension LanguageModel { case .statefulKVCache: LanguageModelWithStatefulKVCache( model: model, - configuration: configuration, tokenizer: tokenizer ) default: LanguageModel( model: model, - configuration: configuration, tokenizer: tokenizer ) } } - - static func loadCompiled( - url: URL, - computeUnits: MLComputeUnits = .cpuAndGPU, - tokenizer tokenizerFolder: URL, - ) throws -> LanguageModel { - let configuration = LanguageModelConfigurationFromHub(modelFolder: tokenizerFolder) - return try loadCompiled( - url: url, - computeUnits: computeUnits, - configuration: configuration - ) - } - - static func loadCompiled( - url: URL, - computeUnits: MLComputeUnits = .cpuAndGPU, - tokenizer: Tokenizer, - ) throws -> LanguageModel { - try loadCompiled( - url: url, - computeUnits: computeUnits, - configuration: nil, - tokenizer: tokenizer - ) - } } @available(macOS 15.0, iOS 18.0, tvOS 18.0, visionOS 2.0, watchOS 11.0, *) @@ -527,10 +493,9 @@ public class LanguageModelWithStatefulKVCache: LanguageModel { public required init( model: MLModel, - configuration: LanguageModelConfigurationFromHub? = nil, tokenizer: Tokenizer? = nil ) { - super.init(model: model, configuration: configuration, tokenizer: tokenizer) + super.init(model: model, tokenizer: tokenizer) // To support pre-filling and extend, the input must support // flexible shapes. guard maxContextLength - minContextLength > 1 else { From 94e93127b1debb206eafa7ef299b1b5030d12c6f Mon Sep 17 00:00:00 2001 From: Kashif Rasul Date: Mon, 27 Oct 2025 09:16:58 +0100 Subject: [PATCH 09/13] add tokenizerFromLocalFolder test --- Tests/TokenizersTests/TokenizerTests.swift | 35 ++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/Tests/TokenizersTests/TokenizerTests.swift b/Tests/TokenizersTests/TokenizerTests.swift index b731194..da48b39 100644 --- a/Tests/TokenizersTests/TokenizerTests.swift +++ b/Tests/TokenizersTests/TokenizerTests.swift @@ -177,6 +177,41 @@ struct TokenizerTests { #expect(tokenizer.encode(text: "<|im_start|>user<|im_sep|>Who are you?<|im_end|><|im_start|>assistant<|im_sep|>") == [100264, 882, 100266, 15546, 527, 499, 30, 100265, 100264, 78191, 100266]) } + @Test + func tokenizerFromLocalFolder() async throws { + let bundle = Bundle.module + guard + let tokenizerConfigURL = bundle.url( + forResource: "tokenizer_config", + withExtension: "json", + subdirectory: "Offline" + ), + bundle.url( + forResource: "tokenizer", + withExtension: "json", + subdirectory: "Offline" + ) != nil + else { + Issue.record("Missing offline tokenizer fixtures") + return + } + + let configuration = LanguageModelConfigurationFromHub(modelFolder: tokenizerConfigURL.deletingLastPathComponent()) + + let tokenizerConfigOpt = try await configuration.tokenizerConfig + #expect(tokenizerConfigOpt != nil) + let tokenizerConfig = tokenizerConfigOpt! + let tokenizerData = try await configuration.tokenizerData + + let tokenizer = try AutoTokenizer.from( + tokenizerConfig: tokenizerConfig, + tokenizerData: tokenizerData + ) + + let encoded = tokenizer.encode(text: "offline path") + #expect(!encoded.isEmpty) + } + /// https://github.com/huggingface/swift-transformers/issues/96 @Test func legacyLlamaBehaviour() async throws { From d3f71c66f5983167a639cd83ec3256bee92802aa Mon Sep 17 00:00:00 2001 From: Kashif Rasul Date: Mon, 27 Oct 2025 09:36:12 +0100 Subject: [PATCH 10/13] add fixtures --- .../Resources/Offline/tokenizer.json | 51 +++++++++++++++++++ .../Resources/Offline/tokenizer_config.json | 9 ++++ 2 files changed, 60 insertions(+) create mode 100644 Tests/TokenizersTests/Resources/Offline/tokenizer.json create mode 100644 Tests/TokenizersTests/Resources/Offline/tokenizer_config.json diff --git a/Tests/TokenizersTests/Resources/Offline/tokenizer.json b/Tests/TokenizersTests/Resources/Offline/tokenizer.json new file mode 100644 index 0000000..65b3e6d --- /dev/null +++ b/Tests/TokenizersTests/Resources/Offline/tokenizer.json @@ -0,0 +1,51 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "" + }, + { + "id": 1, + "content": "" + }, + { + "id": 2, + "content": "" + }, + { + "id": 3, + "content": "" + } + ], + "model": { + "type": "BPE", + "vocab": { + "": 0, + "": 1, + "": 2, + "": 3, + "offline": 4, + "path": 5, + "_": 6 + }, + "merges": [ + "off line", + "li ne", + "pa th", + "_ of", + "_ pa" + ], + "continuing_subword_prefix": "", + "end_of_word_suffix": "", + "unk_token": "" + }, + "normalizer": { + "type": "Lowercase" + }, + "pre_tokenizer": { + "type": "Whitespace" + } +} \ No newline at end of file diff --git a/Tests/TokenizersTests/Resources/Offline/tokenizer_config.json b/Tests/TokenizersTests/Resources/Offline/tokenizer_config.json new file mode 100644 index 0000000..a72a238 --- /dev/null +++ b/Tests/TokenizersTests/Resources/Offline/tokenizer_config.json @@ -0,0 +1,9 @@ +{ + "tokenizer_class": "GPT2Tokenizer", + "bos_token": "", + "eos_token": "", + "unk_token": "", + "pad_token": "", + "model_max_length": 128, + "do_lower_case": false +} \ No newline at end of file From 5e867af642278913152eaed7099e0efeb98cc376 Mon Sep 17 00:00:00 2001 From: Kashif Rasul Date: Mon, 27 Oct 2025 10:10:48 +0100 Subject: [PATCH 11/13] simplified test --- .../{Offline/tokenizer.json => offline_tokenizer.json} | 0 ...nizer_config.json => offline_tokenizer_config.json} | 0 Tests/TokenizersTests/TokenizerTests.swift | 10 ++++------ 3 files changed, 4 insertions(+), 6 deletions(-) rename Tests/TokenizersTests/Resources/{Offline/tokenizer.json => offline_tokenizer.json} (100%) rename Tests/TokenizersTests/Resources/{Offline/tokenizer_config.json => offline_tokenizer_config.json} (100%) diff --git a/Tests/TokenizersTests/Resources/Offline/tokenizer.json b/Tests/TokenizersTests/Resources/offline_tokenizer.json similarity index 100% rename from Tests/TokenizersTests/Resources/Offline/tokenizer.json rename to Tests/TokenizersTests/Resources/offline_tokenizer.json diff --git a/Tests/TokenizersTests/Resources/Offline/tokenizer_config.json b/Tests/TokenizersTests/Resources/offline_tokenizer_config.json similarity index 100% rename from Tests/TokenizersTests/Resources/Offline/tokenizer_config.json rename to Tests/TokenizersTests/Resources/offline_tokenizer_config.json diff --git a/Tests/TokenizersTests/TokenizerTests.swift b/Tests/TokenizersTests/TokenizerTests.swift index da48b39..f0a963a 100644 --- a/Tests/TokenizersTests/TokenizerTests.swift +++ b/Tests/TokenizersTests/TokenizerTests.swift @@ -182,14 +182,12 @@ struct TokenizerTests { let bundle = Bundle.module guard let tokenizerConfigURL = bundle.url( - forResource: "tokenizer_config", - withExtension: "json", - subdirectory: "Offline" + forResource: "offline_tokenizer_config", + withExtension: "json" ), bundle.url( - forResource: "tokenizer", - withExtension: "json", - subdirectory: "Offline" + forResource: "offline_tokenizer", + withExtension: "json" ) != nil else { Issue.record("Missing offline tokenizer fixtures") From d444e42eb278bd6e81554dab13542310a6b12954 Mon Sep 17 00:00:00 2001 From: Kashif Rasul Date: Mon, 27 Oct 2025 11:35:52 +0100 Subject: [PATCH 12/13] fix names --- .../Resources/{offline_tokenizer.json => tokenizer.json} | 0 .../{offline_tokenizer_config.json => tokenizer_config.json} | 0 Tests/TokenizersTests/TokenizerTests.swift | 4 ++-- 3 files changed, 2 insertions(+), 2 deletions(-) rename Tests/TokenizersTests/Resources/{offline_tokenizer.json => tokenizer.json} (100%) rename Tests/TokenizersTests/Resources/{offline_tokenizer_config.json => tokenizer_config.json} (100%) diff --git a/Tests/TokenizersTests/Resources/offline_tokenizer.json b/Tests/TokenizersTests/Resources/tokenizer.json similarity index 100% rename from Tests/TokenizersTests/Resources/offline_tokenizer.json rename to Tests/TokenizersTests/Resources/tokenizer.json diff --git a/Tests/TokenizersTests/Resources/offline_tokenizer_config.json b/Tests/TokenizersTests/Resources/tokenizer_config.json similarity index 100% rename from Tests/TokenizersTests/Resources/offline_tokenizer_config.json rename to Tests/TokenizersTests/Resources/tokenizer_config.json diff --git a/Tests/TokenizersTests/TokenizerTests.swift b/Tests/TokenizersTests/TokenizerTests.swift index f0a963a..b81189a 100644 --- a/Tests/TokenizersTests/TokenizerTests.swift +++ b/Tests/TokenizersTests/TokenizerTests.swift @@ -182,11 +182,11 @@ struct TokenizerTests { let bundle = Bundle.module guard let tokenizerConfigURL = bundle.url( - forResource: "offline_tokenizer_config", + forResource: "tokenizer_config", withExtension: "json" ), bundle.url( - forResource: "offline_tokenizer", + forResource: "tokenizer", withExtension: "json" ) != nil else { From db497623eecdebc930e72eb23556cd8bc8901f53 Mon Sep 17 00:00:00 2001 From: Kashif Rasul Date: Mon, 27 Oct 2025 15:04:04 +0100 Subject: [PATCH 13/13] Update Sources/Models/LanguageModel.swift Co-authored-by: Pedro Cuenca --- Sources/Models/LanguageModel.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Sources/Models/LanguageModel.swift b/Sources/Models/LanguageModel.swift index 9a0c95e..057d750 100644 --- a/Sources/Models/LanguageModel.swift +++ b/Sources/Models/LanguageModel.swift @@ -156,7 +156,7 @@ public extension LanguageModel { /// - Parameters: /// - url: The URL of the compiled CoreML model file (.mlmodelc) /// - computeUnits: The compute units to use for model inference - /// - tokenizer: Optional tokenizer instance to reuse instead of loading from disk + /// - tokenizer: Optional tokenizer instance to reuse /// - Returns: A configured `LanguageModel` instance /// - Throws: An error if the model cannot be loaded from the specified URL static func loadCompiled(