Skip to content

Commit 31f75a3

Browse files
committed
Delay SubString->String conversion
This has a surprisingly large benefit for performance
1 parent 0d219c0 commit 31f75a3

File tree

1 file changed

+13
-12
lines changed

1 file changed

+13
-12
lines changed

src/mmcif.jl

+13-12
Original file line numberDiff line numberDiff line change
@@ -75,16 +75,17 @@ Call `MMCIFDict` with a filepath or stream to read the dictionary from that
7575
source.
7676
The keyword argument `gzip` (default `false`) determines if the input is gzipped.
7777
"""
78-
struct MMCIFDict <: AbstractDict{String, Vector{String}}
79-
dict::Dict{String, Vector{String}}
78+
struct MMCIFDict{K<:AbstractString} <: AbstractDict{K, Vector{K}}
79+
dict::Dict{K, Vector{K}}
8080
end
8181

82-
MMCIFDict() = MMCIFDict(Dict())
82+
MMCIFDict{K}() where K<:AbstractString = MMCIFDict{K}(Dict{K,Vector{K}}())
83+
MMCIFDict() = MMCIFDict{String}()
8384

8485
Base.getindex(mmcif_dict::MMCIFDict, field::AbstractString) = mmcif_dict.dict[field]
8586

8687
function Base.setindex!(mmcif_dict::MMCIFDict,
87-
val::AbstractVector{<:String},
88+
val::AbstractVector{<:AbstractString},
8889
field::AbstractString)
8990
mmcif_dict.dict[field] = val
9091
return mmcif_dict
@@ -147,7 +148,7 @@ splitline(s::AbstractString) = splitline!(String[], s) # mostly for testing
147148

148149
# Get tokens from a mmCIF file
149150
function tokenizecif(f::IO)
150-
tokens = String[]
151+
tokens = SubString{String}[]
151152
for line in eachline(f)
152153
if startswith(line, "#")
153154
continue
@@ -172,7 +173,7 @@ end
172173
# This will fail if there is only a single atom record in the file
173174
# and it is not in the loop format
174175
function tokenizecifstructure(f::IO)
175-
tokens = String[]
176+
tokens = SubString{String}[]
176177
reading = false
177178
in_keys = true
178179
category_groups = ["_atom_site.", "_struct_conf."]
@@ -218,14 +219,14 @@ end
218219

219220
# Read a mmCIF file into a MMCIFDict
220221
function MMCIFDict(f::IO; gzip::Bool=false)
221-
mmcif_dict = MMCIFDict()
222222
if gzip
223223
gz = GzipDecompressorStream(f)
224224
tokens = tokenizecif(gz)
225225
close(gz)
226226
else
227227
tokens = tokenizecif(f)
228228
end
229+
mmcif_dict = MMCIFDict{eltype(tokens)}()
229230
# Data label token is read first
230231
if length(tokens) == 0
231232
return mmcif_dict
@@ -236,16 +237,16 @@ function MMCIFDict(f::IO; gzip::Bool=false)
236237
end
237238

238239
# Add tokens to a mmCIF dictionary
239-
function populatedict!(mmcif_dict::MMCIFDict, tokens::AbstractVector{<:AbstractString})
240+
function populatedict!(mmcif_dict::MMCIFDict{K}, tokens::AbstractVector{<:AbstractString}) where K<:AbstractString
240241
key = ""
241-
keys = String[]
242+
keys = K[]
242243
loop_flag = false
243244
i = 0 # Value counter
244245
n = 0 # Key counter
245246
for token in tokens
246247
if token == "loop_" || token == "LOOP_"
247248
loop_flag = true
248-
keys = String[]
249+
keys = K[]
249250
i = 0
250251
n = 0
251252
continue
@@ -258,7 +259,7 @@ function populatedict!(mmcif_dict::MMCIFDict, tokens::AbstractVector{<:AbstractS
258259
if i > 0
259260
loop_flag = false
260261
else
261-
mmcif_dict[token] = String[]
262+
mmcif_dict[token] = K[]
262263
push!(keys, token)
263264
n += 1
264265
continue
@@ -290,14 +291,14 @@ function Base.read(input::IO,
290291
run_dssp::Bool=false,
291292
run_stride::Bool=false,
292293
gzip::Bool=false)
293-
mmcif_dict = MMCIFDict()
294294
if gzip
295295
gz = GzipDecompressorStream(input)
296296
tokens = tokenizecifstructure(gz)
297297
close(gz)
298298
else
299299
tokens = tokenizecifstructure(input)
300300
end
301+
mmcif_dict = MMCIFDict{eltype(tokens)}()
301302
populatedict!(mmcif_dict, tokens)
302303
return MolecularStructure(
303304
mmcif_dict;

0 commit comments

Comments
 (0)