From a3826f3daa27afc7615f0bf36316ce35d164eb8d Mon Sep 17 00:00:00 2001 From: Rauli Ikonen Date: Thu, 25 Jun 2020 12:05:00 +0300 Subject: [PATCH 1/2] Fix streaming upload when blob size is unknown Source stream may return less data than what was requested and in case size of the entire blob was not known this was not taken into account when reading more data, sometimes resulting in all data being read from the stream and sent in one chunk, which could end up being larger than the maximum supported size. --- azure-storage-blob/azure/storage/blob/_upload_chunking.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/azure-storage-blob/azure/storage/blob/_upload_chunking.py b/azure-storage-blob/azure/storage/blob/_upload_chunking.py index 756d6108..39c17f3d 100644 --- a/azure-storage-blob/azure/storage/blob/_upload_chunking.py +++ b/azure-storage-blob/azure/storage/blob/_upload_chunking.py @@ -182,6 +182,8 @@ def get_chunk_streams(self): while True: if self.blob_size: read_size = min(self.chunk_size - len(data), self.blob_size - (index + len(data))) + elif data: + read_size = self.chunk_size - len(data) temp = self.stream.read(read_size) temp = _get_data_bytes_only('temp', temp) data += temp From 8ee7dd5227a1b4771020342e38e02528eb33ce04 Mon Sep 17 00:00:00 2001 From: Rauli Ikonen Date: Thu, 25 Jun 2020 12:09:47 +0300 Subject: [PATCH 2/2] Do not call tell() to fix streamed upload _BlobChunkUploader called tell() on the source stream even though it did not use the result for anything. If the stream was non-seekable this just made the operation fail completely unnecessarily. --- azure-storage-blob/azure/storage/blob/_upload_chunking.py | 1 - 1 file changed, 1 deletion(-) diff --git a/azure-storage-blob/azure/storage/blob/_upload_chunking.py b/azure-storage-blob/azure/storage/blob/_upload_chunking.py index 39c17f3d..ab3b7dd5 100644 --- a/azure-storage-blob/azure/storage/blob/_upload_chunking.py +++ b/azure-storage-blob/azure/storage/blob/_upload_chunking.py @@ -159,7 +159,6 @@ def __init__(self, blob_service, container_name, blob_name, blob_size, self.chunk_size = chunk_size self.stream = stream self.parallel = parallel - self.stream_start = stream.tell() if parallel else None self.stream_lock = Lock() if parallel else None self.progress_callback = progress_callback self.progress_total = 0