@@ -20,6 +20,12 @@ export encoding, encodings_list, Encoding, @enc_str
2020
2121abstract type StringEncodingError end
2222
23+ # contiguous 1d byte arrays compatible with C `unsigned char *` API
24+ const ByteVector= Union{Vector{UInt8},
25+ Base. FastContiguousSubArray{UInt8,1 ,<: Array{UInt8,1} },
26+ Base. CodeUnits{UInt8, String}, Base. CodeUnits{UInt8, SubString{String}}}
27+ const ByteString = Union{String,SubString{String}}
28+
2329# Specified encodings or the combination are not supported by iconv
2430struct InvalidEncodingError <: StringEncodingError
2531 args:: Tuple{String, String}
@@ -31,7 +37,7 @@ message(::Type{InvalidEncodingError}) = "Conversion from <<1>> to <<2>> not supp
3137struct InvalidSequenceError <: StringEncodingError
3238 args:: Tuple{String}
3339end
34- InvalidSequenceError (seq:: Vector {UInt8} ) = InvalidSequenceError ((bytes2hex (seq),))
40+ InvalidSequenceError (seq:: AbstractVector {UInt8} ) = InvalidSequenceError ((bytes2hex (seq),))
3541message (:: Type{InvalidSequenceError} ) = " Byte sequence 0x<<1>> is invalid in source encoding or cannot be represented in target encoding"
3642
3743struct IConvError <: StringEncodingError
@@ -123,7 +129,7 @@ function finalize(s::Union{StringEncoder, StringDecoder})
123129 nothing
124130end
125131
126- function iconv! (cd:: Ptr{Nothing} , inbuf:: Vector{UInt8} , outbuf:: Vector{UInt8} ,
132+ function iconv! (cd:: Ptr{Nothing} , inbuf:: ByteVector , outbuf:: ByteVector ,
127133 inbufptr:: Ref{Ptr{UInt8}} , outbufptr:: Ref{Ptr{UInt8}} ,
128134 inbytesleft:: Ref{Csize_t} , outbytesleft:: Ref{Csize_t} )
129135 inbufptr[] = pointer (inbuf)
@@ -499,14 +505,20 @@ end
499505# # Functions to encode/decode strings
500506
501507"""
502- decode([T,] a::Vector {UInt8}, enc)
508+ decode([T,] a::AbstractVector {UInt8}, enc)
503509
504510Convert an array of bytes `a` representing text in encoding `enc` to a string of type `T`.
505511By default, a `String` is returned.
506512
513+ To `decode` an `s::String` of data in non-UTF-8 encoding, use
514+ `decode(codeunits(s), enc)` to act on the underlying byte array.
515+
507516`enc` can be specified either as a string or as an `Encoding` object.
517+ The input data `a` can be a `Vector{UInt8}` of bytes, a contiguous
518+ subarray thereof, or the `codeunits` of a `String` (or substring
519+ thereof).
508520"""
509- function decode (:: Type{T} , a:: Vector{UInt8} , enc:: Encoding ) where {T<: AbstractString }
521+ function decode (:: Type{T} , a:: ByteVector , enc:: Encoding ) where {T<: AbstractString }
510522 b = IOBuffer (a)
511523 try
512524 T (read (StringDecoder (b, enc, encoding (T))))
@@ -515,19 +527,19 @@ function decode(::Type{T}, a::Vector{UInt8}, enc::Encoding) where {T<:AbstractSt
515527 end
516528end
517529
518- decode (:: Type{T} , a:: Vector{UInt8} , enc:: AbstractString ) where {T<: AbstractString } =
530+ decode (:: Type{T} , a:: ByteVector , enc:: AbstractString ) where {T<: AbstractString } =
519531 decode (T, a, Encoding (enc))
520532
521- decode (a:: Vector{UInt8} , enc:: AbstractString ) = decode (String, a, Encoding (enc))
522- decode (a:: Vector{UInt8} , enc:: Union{AbstractString, Encoding} ) = decode (String, a, enc)
533+ decode (a:: ByteVector , enc:: Union{AbstractString, Encoding} ) = decode (String, a, enc)
523534
524535"""
525536 encode(s::AbstractString, enc)
526537
527538Convert string `s` to an array of bytes representing text in encoding `enc`.
528539`enc` can be specified either as a string or as an `Encoding` object.
529540"""
530- function encode (s:: AbstractString , enc:: Encoding )
541+ encode (s:: AbstractString , enc:: Encoding ) = encode (String (s), enc)
542+ function encode (s:: ByteString , enc:: Encoding )
531543 b = IOBuffer ()
532544 p = StringEncoder (b, enc, encoding (typeof (s)))
533545 write (p, s)
0 commit comments