Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
name: CI

on:
push:
branches: [ master ]
pull_request:
branches:
- '**'
push:
branches: [master]
workflow_dispatch:

concurrency: # https://stackoverflow.com/questions/66335225#comment133398800_72408109
group: ${{ github.workflow }}-${{ github.ref || github.run_id }}
cancel-in-progress: ${{ github.event_name == 'pull_request' }}

jobs:
run_unittest_tests:
Expand Down
12 changes: 12 additions & 0 deletions docs/backends/amazon-S3.rst
Original file line number Diff line number Diff line change
Expand Up @@ -259,13 +259,25 @@ Settings
Setting this overrides the settings for ``addressing_style``, ``signature_version`` and
``proxies``. Include them as arguments to your ``botocore.config.Config`` class if you need them.

``client_ttl`` or ``AWS_S3_CLIENT_TTL``

Default: ``3600``

The amount of seconds to store a boto3 client resource in an S3Storage instance's time-to-live cache.

.. note::

Long-lived boto3 clients have a known `memory leak`_, which is why the client is
periodically recreated to avoid excessive memory consumption.

.. _AWS Signature Version 4: https://docs.aws.amazon.com/AmazonS3/latest/API/sigv4-query-string-auth.html
.. _S3 region list: https://docs.aws.amazon.com/general/latest/gr/s3.html#s3_region
.. _list of canned ACLs: https://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl
.. _Boto3 docs for uploading files: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Client.put_object
.. _Boto3 docs for TransferConfig: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/customizations/s3.html#boto3.s3.transfer.TransferConfig
.. _ManifestStaticFilesStorage: https://docs.djangoproject.com/en/3.1/ref/contrib/staticfiles/#manifeststaticfilesstorage
.. _Botocore docs: https://botocore.amazonaws.com/v1/documentation/api/latest/reference/config.html#botocore.config.Config
.. _memory leak: https://github.com/boto/boto3/issues/1670

.. _cloudfront-signed-url-header:

Expand Down
114 changes: 72 additions & 42 deletions storages/backends/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import posixpath
import tempfile
import threading
import time
import warnings
from datetime import datetime
from datetime import timedelta
Expand Down Expand Up @@ -38,8 +39,9 @@
from botocore.config import Config
from botocore.exceptions import ClientError
from botocore.signers import CloudFrontSigner
except ImportError as e:
raise ImproperlyConfigured("Could not load Boto3's S3 bindings. %s" % e)
except (ImportError, ModuleNotFoundError) as e:
msg = "Could not import boto3. Did you run 'pip install django-storages[s3]'?"
raise ImproperlyConfigured(msg) from e


# NOTE: these are defined as functions so both can be tested
Expand Down Expand Up @@ -329,9 +331,15 @@ def __init__(self, **settings):
"AWS_S3_SECRET_ACCESS_KEY/secret_key"
)

self._bucket = None
self._connections = threading.local()
self._unsigned_connections = threading.local()
# These variables are used for a boto3 client time-to-live caching mechanism.
# We want to avoid storing a resource for too long to avoid their memory leak
# ref https://github.com/boto/boto3/issues/1670.
self._connection_lock = threading.Lock()
self._connection_expiry = None
self._connection = None
self._unsigned_connection_lock = threading.Lock()
self._unsigned_connection_expiry = None
self._unsigned_connection = None

if self.config is not None:
warnings.warn(
Expand All @@ -347,6 +355,9 @@ def __init__(self, **settings):
s3={"addressing_style": self.addressing_style},
signature_version=self.signature_version,
proxies=self.proxies,
max_pool_connections=64, # shared between threads
tcp_keepalive=True,
retries={"max_attempts": 6, "mode": "adaptive"},
)

if self.use_threads is False:
Expand Down Expand Up @@ -441,58 +452,79 @@ def get_default_settings(self):
"use_threads": setting("AWS_S3_USE_THREADS", True),
"transfer_config": setting("AWS_S3_TRANSFER_CONFIG", None),
"client_config": setting("AWS_S3_CLIENT_CONFIG", None),
"client_ttl": setting("AWS_S3_CLIENT_TTL", 3600),
}

def __getstate__(self):
state = self.__dict__.copy()
state.pop("_connections", None)
state.pop("_unsigned_connections", None)
state.pop("_bucket", None)
state.pop("_connection_lock", None)
state.pop("_connection_expiry", None)
state.pop("_connection", None)
state.pop("_unsigned_connection_lock", None)
state.pop("_unsigned_connection_expiry", None)
state.pop("_unsigned_connection", None)
return state

def __setstate__(self, state):
state["_connections"] = threading.local()
state["_unsigned_connections"] = threading.local()
state["_bucket"] = None
state["_connection_lock"] = threading.Lock()
state["_connection_expiry"] = None
state["_connection"] = None
state["_unsigned_connection_lock"] = threading.Lock()
state["_unsigned_connection_expiry"] = None
state["_unsigned_connection"] = None
self.__dict__ = state

@property
def connection(self):
connection = getattr(self._connections, "connection", None)
if connection is None:
session = self._create_session()
self._connections.connection = session.resource(
"s3",
region_name=self.region_name,
use_ssl=self.use_ssl,
endpoint_url=self.endpoint_url,
config=self.client_config,
verify=self.verify,
)
return self._connections.connection
"""
Get the (cached) thread-safe boto3 s3 resource.
"""
with self._connection_lock:
if (
self._connection is None # fresh instance
or time.monotonic() > self._connection_expiry # TTL expired
):
self._connection_expiry = time.monotonic() + self.client_ttl
self._connection = self._create_connection()
return self._connection

@property
def unsigned_connection(self):
unsigned_connection = getattr(self._unsigned_connections, "connection", None)
if unsigned_connection is None:
session = self._create_session()
config = self.client_config.merge(
Config(signature_version=botocore.UNSIGNED)
)
self._unsigned_connections.connection = session.resource(
"s3",
region_name=self.region_name,
use_ssl=self.use_ssl,
endpoint_url=self.endpoint_url,
config=config,
verify=self.verify,
)
return self._unsigned_connections.connection
"""
Get the (cached) thread-safe boto3 s3 resource (unsigned).
"""
with self._unsigned_connection_lock:
if (
self._unsigned_connection is None # fresh instance
or time.monotonic() > self._unsigned_connection_expiry # TTL expired
):
self._unsigned_connection_expiry = time.monotonic() + self.client_ttl
self._unsigned_connection = self._create_connection(unsigned=True)
return self._unsigned_connection

def _create_connection(self, *, unsigned=False):
"""
Create a new session and thread-safe boto3 s3 resource.
"""
config = self.client_config
if unsigned:
config = config.merge(Config(signature_version=botocore.UNSIGNED))
session = self._create_session()
# thread-safe boto3 client (wrapped by a boto3 resource) ref:
# https://github.com/boto/boto3/blob/1.38.41/docs/source/guide/clients.rst?plain=1#L111
return session.resource(
"s3",
region_name=self.region_name,
use_ssl=self.use_ssl,
endpoint_url=self.endpoint_url,
config=config,
verify=self.verify,
)

def _create_session(self):
"""
If a user specifies a profile name and this class obtains access keys
from another source such as environment variables,we want the profile
from another source such as environment variables, we want the profile
name to take precedence.
"""
if self.session_profile:
Expand All @@ -511,9 +543,7 @@ def bucket(self):
Get the current bucket. If there is no current bucket object
create it.
"""
if self._bucket is None:
self._bucket = self.connection.Bucket(self.bucket_name)
return self._bucket
return self.connection.Bucket(self.bucket_name)

def _normalize_name(self, name):
"""
Expand Down
Loading