From 3c66fe78c9dd03741ea67670ba8e37f1c9e61ec2 Mon Sep 17 00:00:00 2001
From: Tomas Celaya <tjcelaya@gmail.com>
Date: Thu, 30 Nov 2017 18:10:36 -0800
Subject: [PATCH 1/7] Forked from autopilotpattern/postgres and partially
 working

---
 Dockerfile                          |   70 ++
 README.md                           |   13 +
 etc/cassandra.yaml.ctmpl            | 1232 +++++++++++++++++++++++++++
 etc/containerpilot.json5            |   63 ++
 etc/jmxremote.access                |    1 +
 etc/jmxremote.password              |    1 +
 etc/onChange.sh                     |   10 +
 etc/preStart.sh                     |    1 +
 examples/compose/docker-compose.yml |   34 +
 makefile                            |   24 +
 10 files changed, 1449 insertions(+)
 create mode 100644 Dockerfile
 create mode 100644 README.md
 create mode 100644 etc/cassandra.yaml.ctmpl
 create mode 100644 etc/containerpilot.json5
 create mode 100644 etc/jmxremote.access
 create mode 100644 etc/jmxremote.password
 create mode 100644 etc/onChange.sh
 create mode 100644 etc/preStart.sh
 create mode 100644 examples/compose/docker-compose.yml
 create mode 100644 makefile

diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..7738e69
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,70 @@
+FROM cassandra:3.11.0
+
+# install wget unzip and dig
+RUN set -ex \
+    && apt-get update \
+    && apt-get install -y wget unzip dnsutils \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install Consul
+# Releases at https://releases.hashicorp.com/consul
+RUN set -ex \
+    && export CONSUL_VERSION=1.0.1 \
+    && export CONSUL_CHECKSUM=eac5755a1d19e4b93f6ce30caaf7b3bd8add4557b143890b1c07f5614a667a68 \
+    && wget --quiet -O /tmp/consul.zip "https://releases.hashicorp.com/consul/${CONSUL_VERSION}/consul_${CONSUL_VERSION}_linux_amd64.zip" \
+    && echo "${CONSUL_CHECKSUM}  /tmp/consul.zip" | sha256sum -c \
+    && unzip /tmp/consul -d /usr/local/bin \
+    && rm /tmp/consul.zip \
+    && mkdir -p /etc/consul \
+    && mkdir -p /var/lib/consul \
+    && mkdir /config
+
+# Install Consul template
+# Releases at https://releases.hashicorp.com/consul-template/
+RUN set -ex \
+    && export CONSUL_TEMPLATE_VERSION=0.19.0 \
+    && export CONSUL_TEMPLATE_CHECKSUM=31dda6ebc7bd7712598c6ac0337ce8fd8c533229887bd58e825757af879c5f9f \
+    && wget --quiet -O /tmp/consul-template.zip "https://releases.hashicorp.com/consul-template/${CONSUL_TEMPLATE_VERSION}/consul-template_${CONSUL_TEMPLATE_VERSION}_linux_amd64.zip" \
+    && echo "${CONSUL_TEMPLATE_CHECKSUM}  /tmp/consul-template.zip" | sha256sum -c \
+    && unzip /tmp/consul-template.zip -d /usr/local/bin \
+    && rm /tmp/consul-template.zip
+
+# Add Containerpilot and set its configuration
+ENV CONTAINERPILOT /etc/containerpilot.json5
+ENV CONTAINERPILOT_VERSION 3.3.3
+
+RUN export CONTAINERPILOT_CHECKSUM=8d680939a8a5c8b27e764d55a78f5e3ae7b42ef4 \
+    && export archive=containerpilot-${CONTAINERPILOT_VERSION}.tar.gz \
+    && wget --quiet -O /tmp/${archive} \
+         "https://github.com/joyent/containerpilot/releases/download/${CONTAINERPILOT_VERSION}/${archive}" \
+    && echo "${CONTAINERPILOT_CHECKSUM}  /tmp/${archive}" | sha1sum -c \
+    && tar zxf /tmp/${archive} -C /usr/local/bin \
+    && rm /tmp/${archive}
+
+COPY etc/containerpilot.json5 /etc/containerpilot.json5
+
+COPY etc/preStart.sh /etc/preStart.sh
+COPY etc/onChange.sh /etc/onChange.sh
+COPY etc/cassandra.yaml.ctmpl /etc/cassandra/cassandra.yaml.ctmpl
+
+### Cassandra-specific setup follows
+
+ENV LOCAL_JMX=no
+
+# only the access line actually seems to do anything
+RUN echo 'if [ "$LOCAL_JMX" = "no" ]; then' "\n" \
+           'JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.authenticate=true"' "\n" \
+           'JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.password.file=/etc/cassandra/jmxremote.password"' "\n" \
+           'JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.access.file=/etc/cassandra/jmxremote.access"' "\n" \
+         'fi' "\n" >> /etc/cassandra/cassandra-env.sh
+
+COPY etc/jmxremote.password /etc/cassandra/jmxremote.password
+COPY etc/jmxremote.access /etc/cassandra/jmxremote.access
+
+RUN chown cassandra:cassandra /etc/cassandra/jmxremote.password /etc/cassandra/jmxremote.access \
+    && chmod 400 /etc/cassandra/jmxremote.access /etc/cassandra/jmxremote.password \
+    && chmod +x /etc/preStart.sh /etc/onChange.sh
+
+EXPOSE 7000 7001 7199 9042 9160
+
+ENTRYPOINT ["/usr/local/bin/containerpilot"]
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..4b9c17d
--- /dev/null
+++ b/README.md
@@ -0,0 +1,13 @@
+# Autopilot Pattern Cassandra
+
+
+```
+cd examples/compose
+docker-compose up -d --scale cassandra=3
+# for a cqlsh shell:
+docker-compose exec cassandra cqlsh cassandra
+
+cqlsh> CREATE KEYSPACE demo WITH replication = {'class': 'NetworkTopologyStrategy', 'datacenter1': 2 };
+USE demo;
+
+```
\ No newline at end of file
diff --git a/etc/cassandra.yaml.ctmpl b/etc/cassandra.yaml.ctmpl
new file mode 100644
index 0000000..8308595
--- /dev/null
+++ b/etc/cassandra.yaml.ctmpl
@@ -0,0 +1,1232 @@
+# Cassandra storage config YAML
+
+# NOTE:
+#   See http://wiki.apache.org/cassandra/StorageConfiguration for
+#   full explanations of configuration directives
+# /NOTE
+
+# The name of the cluster. This is mainly used to prevent machines in
+# one logical cluster from joining another.
+cluster_name: 'Test Cluster'
+
+# This defines the number of tokens randomly assigned to this node on the ring
+# The more tokens, relative to other nodes, the larger the proportion of data
+# that this node will store. You probably want all nodes to have the same number
+# of tokens assuming they have equal hardware capability.
+#
+# If you leave this unspecified, Cassandra will use the default of 1 token for legacy compatibility,
+# and will use the initial_token as described below.
+#
+# Specifying initial_token will override this setting on the node's initial start,
+# on subsequent starts, this setting will apply even if initial token is set.
+#
+# If you already have a cluster with 1 token per node, and wish to migrate to 
+# multiple tokens per node, see http://wiki.apache.org/cassandra/Operations
+num_tokens: 256
+
+# Triggers automatic allocation of num_tokens tokens for this node. The allocation
+# algorithm attempts to choose tokens in a way that optimizes replicated load over
+# the nodes in the datacenter for the replication strategy used by the specified
+# keyspace.
+#
+# The load assigned to each node will be close to proportional to its number of
+# vnodes.
+#
+# Only supported with the Murmur3Partitioner.
+# allocate_tokens_for_keyspace: KEYSPACE
+
+# initial_token allows you to specify tokens manually.  While you can use it with
+# vnodes (num_tokens > 1, above) -- in which case you should provide a 
+# comma-separated list -- it's primarily used when adding nodes to legacy clusters 
+# that do not have vnodes enabled.
+# initial_token:
+
+# See http://wiki.apache.org/cassandra/HintedHandoff
+# May either be "true" or "false" to enable globally
+hinted_handoff_enabled: true
+
+# When hinted_handoff_enabled is true, a black list of data centers that will not
+# perform hinted handoff
+# hinted_handoff_disabled_datacenters:
+#    - DC1
+#    - DC2
+
+# this defines the maximum amount of time a dead host will have hints
+# generated.  After it has been dead this long, new hints for it will not be
+# created until it has been seen alive and gone down again.
+max_hint_window_in_ms: 10800000 # 3 hours
+
+# Maximum throttle in KBs per second, per delivery thread.  This will be
+# reduced proportionally to the number of nodes in the cluster.  (If there
+# are two nodes in the cluster, each delivery thread will use the maximum
+# rate; if there are three, each will throttle to half of the maximum,
+# since we expect two nodes to be delivering hints simultaneously.)
+hinted_handoff_throttle_in_kb: 1024
+
+# Number of threads with which to deliver hints;
+# Consider increasing this number when you have multi-dc deployments, since
+# cross-dc handoff tends to be slower
+max_hints_delivery_threads: 2
+
+# Directory where Cassandra should store hints.
+# If not set, the default directory is $CASSANDRA_HOME/data/hints.
+# hints_directory: /var/lib/cassandra/hints
+
+# How often hints should be flushed from the internal buffers to disk.
+# Will *not* trigger fsync.
+hints_flush_period_in_ms: 10000
+
+# Maximum size for a single hints file, in megabytes.
+max_hints_file_size_in_mb: 128
+
+# Compression to apply to the hint files. If omitted, hints files
+# will be written uncompressed. LZ4, Snappy, and Deflate compressors
+# are supported.
+#hints_compression:
+#   - class_name: LZ4Compressor
+#     parameters:
+#         -
+
+# Maximum throttle in KBs per second, total. This will be
+# reduced proportionally to the number of nodes in the cluster.
+batchlog_replay_throttle_in_kb: 1024
+
+# Authentication backend, implementing IAuthenticator; used to identify users
+# Out of the box, Cassandra provides org.apache.cassandra.auth.{AllowAllAuthenticator,
+# PasswordAuthenticator}.
+#
+# - AllowAllAuthenticator performs no checks - set it to disable authentication.
+# - PasswordAuthenticator relies on username/password pairs to authenticate
+#   users. It keeps usernames and hashed passwords in system_auth.roles table.
+#   Please increase system_auth keyspace replication factor if you use this authenticator.
+#   If using PasswordAuthenticator, CassandraRoleManager must also be used (see below)
+authenticator: AllowAllAuthenticator
+
+# Authorization backend, implementing IAuthorizer; used to limit access/provide permissions
+# Out of the box, Cassandra provides org.apache.cassandra.auth.{AllowAllAuthorizer,
+# CassandraAuthorizer}.
+#
+# - AllowAllAuthorizer allows any action to any user - set it to disable authorization.
+# - CassandraAuthorizer stores permissions in system_auth.role_permissions table. Please
+#   increase system_auth keyspace replication factor if you use this authorizer.
+authorizer: AllowAllAuthorizer
+
+# Part of the Authentication & Authorization backend, implementing IRoleManager; used
+# to maintain grants and memberships between roles.
+# Out of the box, Cassandra provides org.apache.cassandra.auth.CassandraRoleManager,
+# which stores role information in the system_auth keyspace. Most functions of the
+# IRoleManager require an authenticated login, so unless the configured IAuthenticator
+# actually implements authentication, most of this functionality will be unavailable.
+#
+# - CassandraRoleManager stores role data in the system_auth keyspace. Please
+#   increase system_auth keyspace replication factor if you use this role manager.
+role_manager: CassandraRoleManager
+
+# Validity period for roles cache (fetching granted roles can be an expensive
+# operation depending on the role manager, CassandraRoleManager is one example)
+# Granted roles are cached for authenticated sessions in AuthenticatedUser and
+# after the period specified here, become eligible for (async) reload.
+# Defaults to 2000, set to 0 to disable caching entirely.
+# Will be disabled automatically for AllowAllAuthenticator.
+roles_validity_in_ms: 2000
+
+# Refresh interval for roles cache (if enabled).
+# After this interval, cache entries become eligible for refresh. Upon next
+# access, an async reload is scheduled and the old value returned until it
+# completes. If roles_validity_in_ms is non-zero, then this must be
+# also.
+# Defaults to the same value as roles_validity_in_ms.
+# roles_update_interval_in_ms: 2000
+
+# Validity period for permissions cache (fetching permissions can be an
+# expensive operation depending on the authorizer, CassandraAuthorizer is
+# one example). Defaults to 2000, set to 0 to disable.
+# Will be disabled automatically for AllowAllAuthorizer.
+permissions_validity_in_ms: 2000
+
+# Refresh interval for permissions cache (if enabled).
+# After this interval, cache entries become eligible for refresh. Upon next
+# access, an async reload is scheduled and the old value returned until it
+# completes. If permissions_validity_in_ms is non-zero, then this must be
+# also.
+# Defaults to the same value as permissions_validity_in_ms.
+# permissions_update_interval_in_ms: 2000
+
+# Validity period for credentials cache. This cache is tightly coupled to
+# the provided PasswordAuthenticator implementation of IAuthenticator. If
+# another IAuthenticator implementation is configured, this cache will not
+# be automatically used and so the following settings will have no effect.
+# Please note, credentials are cached in their encrypted form, so while
+# activating this cache may reduce the number of queries made to the
+# underlying table, it may not  bring a significant reduction in the
+# latency of individual authentication attempts.
+# Defaults to 2000, set to 0 to disable credentials caching.
+credentials_validity_in_ms: 2000
+
+# Refresh interval for credentials cache (if enabled).
+# After this interval, cache entries become eligible for refresh. Upon next
+# access, an async reload is scheduled and the old value returned until it
+# completes. If credentials_validity_in_ms is non-zero, then this must be
+# also.
+# Defaults to the same value as credentials_validity_in_ms.
+# credentials_update_interval_in_ms: 2000
+
+# The partitioner is responsible for distributing groups of rows (by
+# partition key) across nodes in the cluster.  You should leave this
+# alone for new clusters.  The partitioner can NOT be changed without
+# reloading all data, so when upgrading you should set this to the
+# same partitioner you were already using.
+#
+# Besides Murmur3Partitioner, partitioners included for backwards
+# compatibility include RandomPartitioner, ByteOrderedPartitioner, and
+# OrderPreservingPartitioner.
+#
+partitioner: org.apache.cassandra.dht.Murmur3Partitioner
+
+# Directories where Cassandra should store data on disk.  Cassandra
+# will spread data evenly across them, subject to the granularity of
+# the configured compaction strategy.
+# If not set, the default directory is $CASSANDRA_HOME/data/data.
+data_file_directories:
+    - /var/lib/cassandra/data
+
+# commit log.  when running on magnetic HDD, this should be a
+# separate spindle than the data directories.
+# If not set, the default directory is $CASSANDRA_HOME/data/commitlog.
+commitlog_directory: /var/lib/cassandra/commitlog
+
+# Enable / disable CDC functionality on a per-node basis. This modifies the logic used
+# for write path allocation rejection (standard: never reject. cdc: reject Mutation
+# containing a CDC-enabled table if at space limit in cdc_raw_directory).
+cdc_enabled: false
+
+# CommitLogSegments are moved to this directory on flush if cdc_enabled: true and the
+# segment contains mutations for a CDC-enabled table. This should be placed on a
+# separate spindle than the data directories. If not set, the default directory is
+# $CASSANDRA_HOME/data/cdc_raw.
+# cdc_raw_directory: /var/lib/cassandra/cdc_raw
+
+# Policy for data disk failures:
+#
+# die
+#   shut down gossip and client transports and kill the JVM for any fs errors or
+#   single-sstable errors, so the node can be replaced.
+#
+# stop_paranoid
+#   shut down gossip and client transports even for single-sstable errors,
+#   kill the JVM for errors during startup.
+#
+# stop
+#   shut down gossip and client transports, leaving the node effectively dead, but
+#   can still be inspected via JMX, kill the JVM for errors during startup.
+#
+# best_effort
+#    stop using the failed disk and respond to requests based on
+#    remaining available sstables.  This means you WILL see obsolete
+#    data at CL.ONE!
+#
+# ignore
+#    ignore fatal errors and let requests fail, as in pre-1.2 Cassandra
+disk_failure_policy: stop
+
+# Policy for commit disk failures:
+#
+# die
+#   shut down gossip and Thrift and kill the JVM, so the node can be replaced.
+#
+# stop
+#   shut down gossip and Thrift, leaving the node effectively dead, but
+#   can still be inspected via JMX.
+#
+# stop_commit
+#   shutdown the commit log, letting writes collect but
+#   continuing to service reads, as in pre-2.0.5 Cassandra
+#
+# ignore
+#   ignore fatal errors and let the batches fail
+commit_failure_policy: stop
+
+# Maximum size of the native protocol prepared statement cache
+#
+# Valid values are either "auto" (omitting the value) or a value greater 0.
+#
+# Note that specifying a too large value will result in long running GCs and possbily
+# out-of-memory errors. Keep the value at a small fraction of the heap.
+#
+# If you constantly see "prepared statements discarded in the last minute because
+# cache limit reached" messages, the first step is to investigate the root cause
+# of these messages and check whether prepared statements are used correctly -
+# i.e. use bind markers for variable parts.
+#
+# Do only change the default value, if you really have more prepared statements than
+# fit in the cache. In most cases it is not neccessary to change this value.
+# Constantly re-preparing statements is a performance penalty.
+#
+# Default value ("auto") is 1/256th of the heap or 10MB, whichever is greater
+prepared_statements_cache_size_mb:
+
+# Maximum size of the Thrift prepared statement cache
+#
+# If you do not use Thrift at all, it is safe to leave this value at "auto".
+#
+# See description of 'prepared_statements_cache_size_mb' above for more information.
+#
+# Default value ("auto") is 1/256th of the heap or 10MB, whichever is greater
+thrift_prepared_statements_cache_size_mb:
+
+# Maximum size of the key cache in memory.
+#
+# Each key cache hit saves 1 seek and each row cache hit saves 2 seeks at the
+# minimum, sometimes more. The key cache is fairly tiny for the amount of
+# time it saves, so it's worthwhile to use it at large numbers.
+# The row cache saves even more time, but must contain the entire row,
+# so it is extremely space-intensive. It's best to only use the
+# row cache if you have hot rows or static rows.
+#
+# NOTE: if you reduce the size, you may not get you hottest keys loaded on startup.
+#
+# Default value is empty to make it "auto" (min(5% of Heap (in MB), 100MB)). Set to 0 to disable key cache.
+key_cache_size_in_mb:
+
+# Duration in seconds after which Cassandra should
+# save the key cache. Caches are saved to saved_caches_directory as
+# specified in this configuration file.
+#
+# Saved caches greatly improve cold-start speeds, and is relatively cheap in
+# terms of I/O for the key cache. Row cache saving is much more expensive and
+# has limited use.
+#
+# Default is 14400 or 4 hours.
+key_cache_save_period: 14400
+
+# Number of keys from the key cache to save
+# Disabled by default, meaning all keys are going to be saved
+# key_cache_keys_to_save: 100
+
+# Row cache implementation class name. Available implementations:
+#
+# org.apache.cassandra.cache.OHCProvider
+#   Fully off-heap row cache implementation (default).
+#
+# org.apache.cassandra.cache.SerializingCacheProvider
+#   This is the row cache implementation availabile
+#   in previous releases of Cassandra.
+# row_cache_class_name: org.apache.cassandra.cache.OHCProvider
+
+# Maximum size of the row cache in memory.
+# Please note that OHC cache implementation requires some additional off-heap memory to manage
+# the map structures and some in-flight memory during operations before/after cache entries can be
+# accounted against the cache capacity. This overhead is usually small compared to the whole capacity.
+# Do not specify more memory that the system can afford in the worst usual situation and leave some
+# headroom for OS block level cache. Do never allow your system to swap.
+#
+# Default value is 0, to disable row caching.
+row_cache_size_in_mb: 0
+
+# Duration in seconds after which Cassandra should save the row cache.
+# Caches are saved to saved_caches_directory as specified in this configuration file.
+#
+# Saved caches greatly improve cold-start speeds, and is relatively cheap in
+# terms of I/O for the key cache. Row cache saving is much more expensive and
+# has limited use.
+#
+# Default is 0 to disable saving the row cache.
+row_cache_save_period: 0
+
+# Number of keys from the row cache to save.
+# Specify 0 (which is the default), meaning all keys are going to be saved
+# row_cache_keys_to_save: 100
+
+# Maximum size of the counter cache in memory.
+#
+# Counter cache helps to reduce counter locks' contention for hot counter cells.
+# In case of RF = 1 a counter cache hit will cause Cassandra to skip the read before
+# write entirely. With RF > 1 a counter cache hit will still help to reduce the duration
+# of the lock hold, helping with hot counter cell updates, but will not allow skipping
+# the read entirely. Only the local (clock, count) tuple of a counter cell is kept
+# in memory, not the whole counter, so it's relatively cheap.
+#
+# NOTE: if you reduce the size, you may not get you hottest keys loaded on startup.
+#
+# Default value is empty to make it "auto" (min(2.5% of Heap (in MB), 50MB)). Set to 0 to disable counter cache.
+# NOTE: if you perform counter deletes and rely on low gcgs, you should disable the counter cache.
+counter_cache_size_in_mb:
+
+# Duration in seconds after which Cassandra should
+# save the counter cache (keys only). Caches are saved to saved_caches_directory as
+# specified in this configuration file.
+#
+# Default is 7200 or 2 hours.
+counter_cache_save_period: 7200
+
+# Number of keys from the counter cache to save
+# Disabled by default, meaning all keys are going to be saved
+# counter_cache_keys_to_save: 100
+
+# saved caches
+# If not set, the default directory is $CASSANDRA_HOME/data/saved_caches.
+saved_caches_directory: /var/lib/cassandra/saved_caches
+
+# commitlog_sync may be either "periodic" or "batch." 
+# 
+# When in batch mode, Cassandra won't ack writes until the commit log
+# has been fsynced to disk.  It will wait
+# commitlog_sync_batch_window_in_ms milliseconds between fsyncs.
+# This window should be kept short because the writer threads will
+# be unable to do extra work while waiting.  (You may need to increase
+# concurrent_writes for the same reason.)
+#
+# commitlog_sync: batch
+# commitlog_sync_batch_window_in_ms: 2
+#
+# the other option is "periodic" where writes may be acked immediately
+# and the CommitLog is simply synced every commitlog_sync_period_in_ms
+# milliseconds. 
+commitlog_sync: periodic
+commitlog_sync_period_in_ms: 10000
+
+# The size of the individual commitlog file segments.  A commitlog
+# segment may be archived, deleted, or recycled once all the data
+# in it (potentially from each columnfamily in the system) has been
+# flushed to sstables.
+#
+# The default size is 32, which is almost always fine, but if you are
+# archiving commitlog segments (see commitlog_archiving.properties),
+# then you probably want a finer granularity of archiving; 8 or 16 MB
+# is reasonable.
+# Max mutation size is also configurable via max_mutation_size_in_kb setting in
+# cassandra.yaml. The default is half the size commitlog_segment_size_in_mb * 1024.
+#
+# NOTE: If max_mutation_size_in_kb is set explicitly then commitlog_segment_size_in_mb must
+# be set to at least twice the size of max_mutation_size_in_kb / 1024
+#
+commitlog_segment_size_in_mb: 32
+
+# Compression to apply to the commit log. If omitted, the commit log
+# will be written uncompressed.  LZ4, Snappy, and Deflate compressors
+# are supported.
+# commitlog_compression:
+#   - class_name: LZ4Compressor
+#     parameters:
+#         -
+
+# any class that implements the SeedProvider interface and has a
+# constructor that takes a Map<String, String> of parameters will do.
+seed_provider:
+    # Addresses of hosts that are deemed contact points. 
+    # Cassandra nodes use this list of hosts to find each other and learn
+    # the topology of the ring.  You must change this if you are running
+    # multiple nodes!
+    - class_name: org.apache.cassandra.locator.SimpleSeedProvider
+      parameters:
+          # seeds is actually a comma-delimited list of addresses.
+          # Ex: "<ip1>,<ip2>,<ip3>"
+          - seeds: {{ range service "cassandra" }}{{ .Address }},{{ end }}
+
+# For workloads with more data than can fit in memory, Cassandra's
+# bottleneck will be reads that need to fetch data from
+# disk. "concurrent_reads" should be set to (16 * number_of_drives) in
+# order to allow the operations to enqueue low enough in the stack
+# that the OS and drives can reorder them. Same applies to
+# "concurrent_counter_writes", since counter writes read the current
+# values before incrementing and writing them back.
+#
+# On the other hand, since writes are almost never IO bound, the ideal
+# number of "concurrent_writes" is dependent on the number of cores in
+# your system; (8 * number_of_cores) is a good rule of thumb.
+concurrent_reads: 32
+concurrent_writes: 32
+concurrent_counter_writes: 32
+
+# For materialized view writes, as there is a read involved, so this should
+# be limited by the less of concurrent reads or concurrent writes.
+concurrent_materialized_view_writes: 32
+
+# Maximum memory to use for sstable chunk cache and buffer pooling.
+# 32MB of this are reserved for pooling buffers, the rest is used as an
+# cache that holds uncompressed sstable chunks.
+# Defaults to the smaller of 1/4 of heap or 512MB. This pool is allocated off-heap,
+# so is in addition to the memory allocated for heap. The cache also has on-heap
+# overhead which is roughly 128 bytes per chunk (i.e. 0.2% of the reserved size
+# if the default 64k chunk size is used).
+# Memory is only allocated when needed.
+# file_cache_size_in_mb: 512
+
+# Flag indicating whether to allocate on or off heap when the sstable buffer
+# pool is exhausted, that is when it has exceeded the maximum memory
+# file_cache_size_in_mb, beyond which it will not cache buffers but allocate on request.
+
+# buffer_pool_use_heap_if_exhausted: true
+
+# The strategy for optimizing disk read
+# Possible values are:
+# ssd (for solid state disks, the default)
+# spinning (for spinning disks)
+# disk_optimization_strategy: ssd
+
+# Total permitted memory to use for memtables. Cassandra will stop
+# accepting writes when the limit is exceeded until a flush completes,
+# and will trigger a flush based on memtable_cleanup_threshold
+# If omitted, Cassandra will set both to 1/4 the size of the heap.
+# memtable_heap_space_in_mb: 2048
+# memtable_offheap_space_in_mb: 2048
+
+# memtable_cleanup_threshold is deprecated. The default calculation
+# is the only reasonable choice. See the comments on  memtable_flush_writers
+# for more information.
+#
+# Ratio of occupied non-flushing memtable size to total permitted size
+# that will trigger a flush of the largest memtable. Larger mct will
+# mean larger flushes and hence less compaction, but also less concurrent
+# flush activity which can make it difficult to keep your disks fed
+# under heavy write load.
+#
+# memtable_cleanup_threshold defaults to 1 / (memtable_flush_writers + 1)
+# memtable_cleanup_threshold: 0.11
+
+# Specify the way Cassandra allocates and manages memtable memory.
+# Options are:
+#
+# heap_buffers
+#   on heap nio buffers
+#
+# offheap_buffers
+#   off heap (direct) nio buffers
+#
+# offheap_objects
+#    off heap objects
+memtable_allocation_type: heap_buffers
+
+# Total space to use for commit logs on disk.
+#
+# If space gets above this value, Cassandra will flush every dirty CF
+# in the oldest segment and remove it.  So a small total commitlog space
+# will tend to cause more flush activity on less-active columnfamilies.
+#
+# The default value is the smaller of 8192, and 1/4 of the total space
+# of the commitlog volume.
+#
+# commitlog_total_space_in_mb: 8192
+
+# This sets the number of memtable flush writer threads per disk
+# as well as the total number of memtables that can be flushed concurrently.
+# These are generally a combination of compute and IO bound.
+#
+# Memtable flushing is more CPU efficient than memtable ingest and a single thread
+# can keep up with the ingest rate of a whole server on a single fast disk
+# until it temporarily becomes IO bound under contention typically with compaction.
+# At that point you need multiple flush threads. At some point in the future
+# it may become CPU bound all the time.
+#
+# You can tell if flushing is falling behind using the MemtablePool.BlockedOnAllocation
+# metric which should be 0, but will be non-zero if threads are blocked waiting on flushing
+# to free memory.
+#
+# memtable_flush_writers defaults to two for a single data directory.
+# This means that two  memtables can be flushed concurrently to the single data directory.
+# If you have multiple data directories the default is one memtable flushing at a time
+# but the flush will use a thread per data directory so you will get two or more writers.
+#
+# Two is generally enough to flush on a fast disk [array] mounted as a single data directory.
+# Adding more flush writers will result in smaller more frequent flushes that introduce more
+# compaction overhead.
+#
+# There is a direct tradeoff between number of memtables that can be flushed concurrently
+# and flush size and frequency. More is not better you just need enough flush writers
+# to never stall waiting for flushing to free memory.
+#
+#memtable_flush_writers: 2
+
+# Total space to use for change-data-capture logs on disk.
+#
+# If space gets above this value, Cassandra will throw WriteTimeoutException
+# on Mutations including tables with CDC enabled. A CDCCompactor is responsible
+# for parsing the raw CDC logs and deleting them when parsing is completed.
+#
+# The default value is the min of 4096 mb and 1/8th of the total space
+# of the drive where cdc_raw_directory resides.
+# cdc_total_space_in_mb: 4096
+
+# When we hit our cdc_raw limit and the CDCCompactor is either running behind
+# or experiencing backpressure, we check at the following interval to see if any
+# new space for cdc-tracked tables has been made available. Default to 250ms
+# cdc_free_space_check_interval_ms: 250
+
+# A fixed memory pool size in MB for for SSTable index summaries. If left
+# empty, this will default to 5% of the heap size. If the memory usage of
+# all index summaries exceeds this limit, SSTables with low read rates will
+# shrink their index summaries in order to meet this limit.  However, this
+# is a best-effort process. In extreme conditions Cassandra may need to use
+# more than this amount of memory.
+index_summary_capacity_in_mb:
+
+# How frequently index summaries should be resampled.  This is done
+# periodically to redistribute memory from the fixed-size pool to sstables
+# proportional their recent read rates.  Setting to -1 will disable this
+# process, leaving existing index summaries at their current sampling level.
+index_summary_resize_interval_in_minutes: 60
+
+# Whether to, when doing sequential writing, fsync() at intervals in
+# order to force the operating system to flush the dirty
+# buffers. Enable this to avoid sudden dirty buffer flushing from
+# impacting read latencies. Almost always a good idea on SSDs; not
+# necessarily on platters.
+trickle_fsync: false
+trickle_fsync_interval_in_kb: 10240
+
+# TCP port, for commands and data
+# For security reasons, you should not expose this port to the internet.  Firewall it if needed.
+storage_port: 7000
+
+# SSL port, for encrypted communication.  Unused unless enabled in
+# encryption_options
+# For security reasons, you should not expose this port to the internet.  Firewall it if needed.
+ssl_storage_port: 7001
+
+# Address or interface to bind to and tell other Cassandra nodes to connect to.
+# You _must_ change this if you want multiple nodes to be able to communicate!
+#
+# Set listen_address OR listen_interface, not both.
+#
+# Leaving it blank leaves it up to InetAddress.getLocalHost(). This
+# will always do the Right Thing _if_ the node is properly configured
+# (hostname, name resolution, etc), and the Right Thing is to use the
+# address associated with the hostname (it might not be).
+#
+# Setting listen_address to 0.0.0.0 is always wrong.
+#
+listen_address: 172.20.0.3
+
+# Set listen_address OR listen_interface, not both. Interfaces must correspond
+# to a single address, IP aliasing is not supported.
+# listen_interface: eth0
+
+# If you choose to specify the interface by name and the interface has an ipv4 and an ipv6 address
+# you can specify which should be chosen using listen_interface_prefer_ipv6. If false the first ipv4
+# address will be used. If true the first ipv6 address will be used. Defaults to false preferring
+# ipv4. If there is only one address it will be selected regardless of ipv4/ipv6.
+# listen_interface_prefer_ipv6: false
+
+# Address to broadcast to other Cassandra nodes
+# Leaving this blank will set it to the same value as listen_address
+broadcast_address: 172.20.0.3
+
+# When using multiple physical network interfaces, set this
+# to true to listen on broadcast_address in addition to
+# the listen_address, allowing nodes to communicate in both
+# interfaces.
+# Ignore this property if the network configuration automatically
+# routes  between the public and private networks such as EC2.
+# listen_on_broadcast_address: false
+
+# Internode authentication backend, implementing IInternodeAuthenticator;
+# used to allow/disallow connections from peer nodes.
+# internode_authenticator: org.apache.cassandra.auth.AllowAllInternodeAuthenticator
+
+# Whether to start the native transport server.
+# Please note that the address on which the native transport is bound is the
+# same as the rpc_address. The port however is different and specified below.
+start_native_transport: true
+# port for the CQL native transport to listen for clients on
+# For security reasons, you should not expose this port to the internet.  Firewall it if needed.
+native_transport_port: 9042
+# Enabling native transport encryption in client_encryption_options allows you to either use
+# encryption for the standard port or to use a dedicated, additional port along with the unencrypted
+# standard native_transport_port.
+# Enabling client encryption and keeping native_transport_port_ssl disabled will use encryption
+# for native_transport_port. Setting native_transport_port_ssl to a different value
+# from native_transport_port will use encryption for native_transport_port_ssl while
+# keeping native_transport_port unencrypted.
+# native_transport_port_ssl: 9142
+# The maximum threads for handling requests when the native transport is used.
+# This is similar to rpc_max_threads though the default differs slightly (and
+# there is no native_transport_min_threads, idle threads will always be stopped
+# after 30 seconds).
+# native_transport_max_threads: 128
+#
+# The maximum size of allowed frame. Frame (requests) larger than this will
+# be rejected as invalid. The default is 256MB. If you're changing this parameter,
+# you may want to adjust max_value_size_in_mb accordingly.
+# native_transport_max_frame_size_in_mb: 256
+
+# The maximum number of concurrent client connections.
+# The default is -1, which means unlimited.
+# native_transport_max_concurrent_connections: -1
+
+# The maximum number of concurrent client connections per source ip.
+# The default is -1, which means unlimited.
+# native_transport_max_concurrent_connections_per_ip: -1
+
+# Whether to start the thrift rpc server.
+start_rpc: false
+
+# The address or interface to bind the Thrift RPC service and native transport
+# server to.
+#
+# Set rpc_address OR rpc_interface, not both.
+#
+# Leaving rpc_address blank has the same effect as on listen_address
+# (i.e. it will be based on the configured hostname of the node).
+#
+# Note that unlike listen_address, you can specify 0.0.0.0, but you must also
+# set broadcast_rpc_address to a value other than 0.0.0.0.
+#
+# For security reasons, you should not expose this port to the internet.  Firewall it if needed.
+rpc_address: 0.0.0.0
+
+# Set rpc_address OR rpc_interface, not both. Interfaces must correspond
+# to a single address, IP aliasing is not supported.
+# rpc_interface: eth1
+
+# If you choose to specify the interface by name and the interface has an ipv4 and an ipv6 address
+# you can specify which should be chosen using rpc_interface_prefer_ipv6. If false the first ipv4
+# address will be used. If true the first ipv6 address will be used. Defaults to false preferring
+# ipv4. If there is only one address it will be selected regardless of ipv4/ipv6.
+# rpc_interface_prefer_ipv6: false
+
+# port for Thrift to listen for clients on
+rpc_port: 9160
+
+# RPC address to broadcast to drivers and other Cassandra nodes. This cannot
+# be set to 0.0.0.0. If left blank, this will be set to the value of
+# rpc_address. If rpc_address is set to 0.0.0.0, broadcast_rpc_address must
+# be set.
+broadcast_rpc_address: 172.20.0.3
+
+# enable or disable keepalive on rpc/native connections
+rpc_keepalive: true
+
+# Cassandra provides two out-of-the-box options for the RPC Server:
+#
+# sync
+#   One thread per thrift connection. For a very large number of clients, memory
+#   will be your limiting factor. On a 64 bit JVM, 180KB is the minimum stack size
+#   per thread, and that will correspond to your use of virtual memory (but physical memory
+#   may be limited depending on use of stack space).
+#
+# hsha
+#   Stands for "half synchronous, half asynchronous." All thrift clients are handled
+#   asynchronously using a small number of threads that does not vary with the amount
+#   of thrift clients (and thus scales well to many clients). The rpc requests are still
+#   synchronous (one thread per active request). If hsha is selected then it is essential
+#   that rpc_max_threads is changed from the default value of unlimited.
+#
+# The default is sync because on Windows hsha is about 30% slower.  On Linux,
+# sync/hsha performance is about the same, with hsha of course using less memory.
+#
+# Alternatively,  can provide your own RPC server by providing the fully-qualified class name
+# of an o.a.c.t.TServerFactory that can create an instance of it.
+rpc_server_type: sync
+
+# Uncomment rpc_min|max_thread to set request pool size limits.
+#
+# Regardless of your choice of RPC server (see above), the number of maximum requests in the
+# RPC thread pool dictates how many concurrent requests are possible (but if you are using the sync
+# RPC server, it also dictates the number of clients that can be connected at all).
+#
+# The default is unlimited and thus provides no protection against clients overwhelming the server. You are
+# encouraged to set a maximum that makes sense for you in production, but do keep in mind that
+# rpc_max_threads represents the maximum number of client requests this server may execute concurrently.
+#
+# rpc_min_threads: 16
+# rpc_max_threads: 2048
+
+# uncomment to set socket buffer sizes on rpc connections
+# rpc_send_buff_size_in_bytes:
+# rpc_recv_buff_size_in_bytes:
+
+# Uncomment to set socket buffer size for internode communication
+# Note that when setting this, the buffer size is limited by net.core.wmem_max
+# and when not setting it it is defined by net.ipv4.tcp_wmem
+# See also:
+# /proc/sys/net/core/wmem_max
+# /proc/sys/net/core/rmem_max
+# /proc/sys/net/ipv4/tcp_wmem
+# /proc/sys/net/ipv4/tcp_wmem
+# and 'man tcp'
+# internode_send_buff_size_in_bytes:
+
+# Uncomment to set socket buffer size for internode communication
+# Note that when setting this, the buffer size is limited by net.core.wmem_max
+# and when not setting it it is defined by net.ipv4.tcp_wmem
+# internode_recv_buff_size_in_bytes:
+
+# Frame size for thrift (maximum message length).
+thrift_framed_transport_size_in_mb: 15
+
+# Set to true to have Cassandra create a hard link to each sstable
+# flushed or streamed locally in a backups/ subdirectory of the
+# keyspace data.  Removing these links is the operator's
+# responsibility.
+incremental_backups: false
+
+# Whether or not to take a snapshot before each compaction.  Be
+# careful using this option, since Cassandra won't clean up the
+# snapshots for you.  Mostly useful if you're paranoid when there
+# is a data format change.
+snapshot_before_compaction: false
+
+# Whether or not a snapshot is taken of the data before keyspace truncation
+# or dropping of column families. The STRONGLY advised default of true 
+# should be used to provide data safety. If you set this flag to false, you will
+# lose data on truncation or drop.
+auto_snapshot: true
+
+# Granularity of the collation index of rows within a partition.
+# Increase if your rows are large, or if you have a very large
+# number of rows per partition.  The competing goals are these:
+#
+# - a smaller granularity means more index entries are generated
+#   and looking up rows withing the partition by collation column
+#   is faster
+# - but, Cassandra will keep the collation index in memory for hot
+#   rows (as part of the key cache), so a larger granularity means
+#   you can cache more hot rows
+column_index_size_in_kb: 64
+
+# Per sstable indexed key cache entries (the collation index in memory
+# mentioned above) exceeding this size will not be held on heap.
+# This means that only partition information is held on heap and the
+# index entries are read from disk.
+#
+# Note that this size refers to the size of the
+# serialized index information and not the size of the partition.
+column_index_cache_size_in_kb: 2
+
+# Number of simultaneous compactions to allow, NOT including
+# validation "compactions" for anti-entropy repair.  Simultaneous
+# compactions can help preserve read performance in a mixed read/write
+# workload, by mitigating the tendency of small sstables to accumulate
+# during a single long running compactions. The default is usually
+# fine and if you experience problems with compaction running too
+# slowly or too fast, you should look at
+# compaction_throughput_mb_per_sec first.
+#
+# concurrent_compactors defaults to the smaller of (number of disks,
+# number of cores), with a minimum of 2 and a maximum of 8.
+# 
+# If your data directories are backed by SSD, you should increase this
+# to the number of cores.
+#concurrent_compactors: 1
+
+# Throttles compaction to the given total throughput across the entire
+# system. The faster you insert data, the faster you need to compact in
+# order to keep the sstable count down, but in general, setting this to
+# 16 to 32 times the rate you are inserting data is more than sufficient.
+# Setting this to 0 disables throttling. Note that this account for all types
+# of compaction, including validation compaction.
+compaction_throughput_mb_per_sec: 16
+
+# When compacting, the replacement sstable(s) can be opened before they
+# are completely written, and used in place of the prior sstables for
+# any range that has been written. This helps to smoothly transfer reads 
+# between the sstables, reducing page cache churn and keeping hot rows hot
+sstable_preemptive_open_interval_in_mb: 50
+
+# Throttles all outbound streaming file transfers on this node to the
+# given total throughput in Mbps. This is necessary because Cassandra does
+# mostly sequential IO when streaming data during bootstrap or repair, which
+# can lead to saturating the network connection and degrading rpc performance.
+# When unset, the default is 200 Mbps or 25 MB/s.
+# stream_throughput_outbound_megabits_per_sec: 200
+
+# Throttles all streaming file transfer between the datacenters,
+# this setting allows users to throttle inter dc stream throughput in addition
+# to throttling all network stream traffic as configured with
+# stream_throughput_outbound_megabits_per_sec
+# When unset, the default is 200 Mbps or 25 MB/s
+# inter_dc_stream_throughput_outbound_megabits_per_sec: 200
+
+# How long the coordinator should wait for read operations to complete
+read_request_timeout_in_ms: 5000
+# How long the coordinator should wait for seq or index scans to complete
+range_request_timeout_in_ms: 10000
+# How long the coordinator should wait for writes to complete
+write_request_timeout_in_ms: 2000
+# How long the coordinator should wait for counter writes to complete
+counter_write_request_timeout_in_ms: 5000
+# How long a coordinator should continue to retry a CAS operation
+# that contends with other proposals for the same row
+cas_contention_timeout_in_ms: 1000
+# How long the coordinator should wait for truncates to complete
+# (This can be much longer, because unless auto_snapshot is disabled
+# we need to flush first so we can snapshot before removing the data.)
+truncate_request_timeout_in_ms: 60000
+# The default timeout for other, miscellaneous operations
+request_timeout_in_ms: 10000
+
+# How long before a node logs slow queries. Select queries that take longer than
+# this timeout to execute, will generate an aggregated log message, so that slow queries
+# can be identified. Set this value to zero to disable slow query logging.
+slow_query_log_timeout_in_ms: 500
+
+# Enable operation timeout information exchange between nodes to accurately
+# measure request timeouts.  If disabled, replicas will assume that requests
+# were forwarded to them instantly by the coordinator, which means that
+# under overload conditions we will waste that much extra time processing 
+# already-timed-out requests.
+#
+# Warning: before enabling this property make sure to ntp is installed
+# and the times are synchronized between the nodes.
+cross_node_timeout: false
+
+# Set keep-alive period for streaming
+# This node will send a keep-alive message periodically with this period.
+# If the node does not receive a keep-alive message from the peer for
+# 2 keep-alive cycles the stream session times out and fail
+# Default value is 300s (5 minutes), which means stalled stream
+# times out in 10 minutes by default
+# streaming_keep_alive_period_in_secs: 300
+
+# phi value that must be reached for a host to be marked down.
+# most users should never need to adjust this.
+# phi_convict_threshold: 8
+
+# endpoint_snitch -- Set this to a class that implements
+# IEndpointSnitch.  The snitch has two functions:
+#
+# - it teaches Cassandra enough about your network topology to route
+#   requests efficiently
+# - it allows Cassandra to spread replicas around your cluster to avoid
+#   correlated failures. It does this by grouping machines into
+#   "datacenters" and "racks."  Cassandra will do its best not to have
+#   more than one replica on the same "rack" (which may not actually
+#   be a physical location)
+#
+# CASSANDRA WILL NOT ALLOW YOU TO SWITCH TO AN INCOMPATIBLE SNITCH
+# ONCE DATA IS INSERTED INTO THE CLUSTER.  This would cause data loss.
+# This means that if you start with the default SimpleSnitch, which
+# locates every node on "rack1" in "datacenter1", your only options
+# if you need to add another datacenter are GossipingPropertyFileSnitch
+# (and the older PFS).  From there, if you want to migrate to an
+# incompatible snitch like Ec2Snitch you can do it by adding new nodes
+# under Ec2Snitch (which will locate them in a new "datacenter") and
+# decommissioning the old ones.
+#
+# Out of the box, Cassandra provides:
+#
+# SimpleSnitch:
+#    Treats Strategy order as proximity. This can improve cache
+#    locality when disabling read repair.  Only appropriate for
+#    single-datacenter deployments.
+#
+# GossipingPropertyFileSnitch
+#    This should be your go-to snitch for production use.  The rack
+#    and datacenter for the local node are defined in
+#    cassandra-rackdc.properties and propagated to other nodes via
+#    gossip.  If cassandra-topology.properties exists, it is used as a
+#    fallback, allowing migration from the PropertyFileSnitch.
+#
+# PropertyFileSnitch:
+#    Proximity is determined by rack and data center, which are
+#    explicitly configured in cassandra-topology.properties.
+#
+# Ec2Snitch:
+#    Appropriate for EC2 deployments in a single Region. Loads Region
+#    and Availability Zone information from the EC2 API. The Region is
+#    treated as the datacenter, and the Availability Zone as the rack.
+#    Only private IPs are used, so this will not work across multiple
+#    Regions.
+#
+# Ec2MultiRegionSnitch:
+#    Uses public IPs as broadcast_address to allow cross-region
+#    connectivity.  (Thus, you should set seed addresses to the public
+#    IP as well.) You will need to open the storage_port or
+#    ssl_storage_port on the public IP firewall.  (For intra-Region
+#    traffic, Cassandra will switch to the private IP after
+#    establishing a connection.)
+#
+# RackInferringSnitch:
+#    Proximity is determined by rack and data center, which are
+#    assumed to correspond to the 3rd and 2nd octet of each node's IP
+#    address, respectively.  Unless this happens to match your
+#    deployment conventions, this is best used as an example of
+#    writing a custom Snitch class and is provided in that spirit.
+#
+# You can use a custom Snitch by setting this to the full class name
+# of the snitch, which will be assumed to be on your classpath.
+endpoint_snitch: SimpleSnitch
+
+# controls how often to perform the more expensive part of host score
+# calculation
+dynamic_snitch_update_interval_in_ms: 100 
+# controls how often to reset all host scores, allowing a bad host to
+# possibly recover
+dynamic_snitch_reset_interval_in_ms: 600000
+# if set greater than zero and read_repair_chance is < 1.0, this will allow
+# 'pinning' of replicas to hosts in order to increase cache capacity.
+# The badness threshold will control how much worse the pinned host has to be
+# before the dynamic snitch will prefer other replicas over it.  This is
+# expressed as a double which represents a percentage.  Thus, a value of
+# 0.2 means Cassandra would continue to prefer the static snitch values
+# until the pinned host was 20% worse than the fastest.
+dynamic_snitch_badness_threshold: 0.1
+
+# request_scheduler -- Set this to a class that implements
+# RequestScheduler, which will schedule incoming client requests
+# according to the specific policy. This is useful for multi-tenancy
+# with a single Cassandra cluster.
+# NOTE: This is specifically for requests from the client and does
+# not affect inter node communication.
+# org.apache.cassandra.scheduler.NoScheduler - No scheduling takes place
+# org.apache.cassandra.scheduler.RoundRobinScheduler - Round robin of
+# client requests to a node with a separate queue for each
+# request_scheduler_id. The scheduler is further customized by
+# request_scheduler_options as described below.
+request_scheduler: org.apache.cassandra.scheduler.NoScheduler
+
+# Scheduler Options vary based on the type of scheduler
+#
+# NoScheduler
+#   Has no options
+#
+# RoundRobin
+#   throttle_limit
+#     The throttle_limit is the number of in-flight
+#     requests per client.  Requests beyond 
+#     that limit are queued up until
+#     running requests can complete.
+#     The value of 80 here is twice the number of
+#     concurrent_reads + concurrent_writes.
+#   default_weight
+#     default_weight is optional and allows for
+#     overriding the default which is 1.
+#   weights
+#     Weights are optional and will default to 1 or the
+#     overridden default_weight. The weight translates into how
+#     many requests are handled during each turn of the
+#     RoundRobin, based on the scheduler id.
+#
+# request_scheduler_options:
+#    throttle_limit: 80
+#    default_weight: 5
+#    weights:
+#      Keyspace1: 1
+#      Keyspace2: 5
+
+# request_scheduler_id -- An identifier based on which to perform
+# the request scheduling. Currently the only valid option is keyspace.
+# request_scheduler_id: keyspace
+
+# Enable or disable inter-node encryption
+# JVM defaults for supported SSL socket protocols and cipher suites can
+# be replaced using custom encryption options. This is not recommended
+# unless you have policies in place that dictate certain settings, or
+# need to disable vulnerable ciphers or protocols in case the JVM cannot
+# be updated.
+# FIPS compliant settings can be configured at JVM level and should not
+# involve changing encryption settings here:
+# https://docs.oracle.com/javase/8/docs/technotes/guides/security/jsse/FIPS.html
+# *NOTE* No custom encryption options are enabled at the moment
+# The available internode options are : all, none, dc, rack
+#
+# If set to dc cassandra will encrypt the traffic between the DCs
+# If set to rack cassandra will encrypt the traffic between the racks
+#
+# The passwords used in these options must match the passwords used when generating
+# the keystore and truststore.  For instructions on generating these files, see:
+# http://download.oracle.com/javase/6/docs/technotes/guides/security/jsse/JSSERefGuide.html#CreateKeystore
+#
+server_encryption_options:
+    internode_encryption: none
+    keystore: conf/.keystore
+    keystore_password: cassandra
+    truststore: conf/.truststore
+    truststore_password: cassandra
+    # More advanced defaults below:
+    # protocol: TLS
+    # algorithm: SunX509
+    # store_type: JKS
+    # cipher_suites: [TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA,TLS_DHE_RSA_WITH_AES_128_CBC_SHA,TLS_DHE_RSA_WITH_AES_256_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA]
+    # require_client_auth: false
+    # require_endpoint_verification: false
+
+# enable or disable client/server encryption.
+client_encryption_options:
+    enabled: false
+    # If enabled and optional is set to true encrypted and unencrypted connections are handled.
+    optional: false
+    keystore: conf/.keystore
+    keystore_password: cassandra
+    # require_client_auth: false
+    # Set trustore and truststore_password if require_client_auth is true
+    # truststore: conf/.truststore
+    # truststore_password: cassandra
+    # More advanced defaults below:
+    # protocol: TLS
+    # algorithm: SunX509
+    # store_type: JKS
+    # cipher_suites: [TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA,TLS_DHE_RSA_WITH_AES_128_CBC_SHA,TLS_DHE_RSA_WITH_AES_256_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA]
+
+# internode_compression controls whether traffic between nodes is
+# compressed.
+# Can be:
+#
+# all
+#   all traffic is compressed
+#
+# dc
+#   traffic between different datacenters is compressed
+#
+# none
+#   nothing is compressed.
+internode_compression: dc
+
+# Enable or disable tcp_nodelay for inter-dc communication.
+# Disabling it will result in larger (but fewer) network packets being sent,
+# reducing overhead from the TCP protocol itself, at the cost of increasing
+# latency if you block for cross-datacenter responses.
+inter_dc_tcp_nodelay: false
+
+# TTL for different trace types used during logging of the repair process.
+tracetype_query_ttl: 86400
+tracetype_repair_ttl: 604800
+
+# By default, Cassandra logs GC Pauses greater than 200 ms at INFO level
+# This threshold can be adjusted to minimize logging if necessary
+# gc_log_threshold_in_ms: 200
+
+# If unset, all GC Pauses greater than gc_log_threshold_in_ms will log at
+# INFO level
+# UDFs (user defined functions) are disabled by default.
+# As of Cassandra 3.0 there is a sandbox in place that should prevent execution of evil code.
+enable_user_defined_functions: false
+
+# Enables scripted UDFs (JavaScript UDFs).
+# Java UDFs are always enabled, if enable_user_defined_functions is true.
+# Enable this option to be able to use UDFs with "language javascript" or any custom JSR-223 provider.
+# This option has no effect, if enable_user_defined_functions is false.
+enable_scripted_user_defined_functions: false
+
+# The default Windows kernel timer and scheduling resolution is 15.6ms for power conservation.
+# Lowering this value on Windows can provide much tighter latency and better throughput, however
+# some virtualized environments may see a negative performance impact from changing this setting
+# below their system default. The sysinternals 'clockres' tool can confirm your system's default
+# setting.
+windows_timer_interval: 1
+
+
+# Enables encrypting data at-rest (on disk). Different key providers can be plugged in, but the default reads from
+# a JCE-style keystore. A single keystore can hold multiple keys, but the one referenced by
+# the "key_alias" is the only key that will be used for encrypt opertaions; previously used keys
+# can still (and should!) be in the keystore and will be used on decrypt operations
+# (to handle the case of key rotation).
+#
+# It is strongly recommended to download and install Java Cryptography Extension (JCE)
+# Unlimited Strength Jurisdiction Policy Files for your version of the JDK.
+# (current link: http://www.oracle.com/technetwork/java/javase/downloads/jce8-download-2133166.html)
+#
+# Currently, only the following file types are supported for transparent data encryption, although
+# more are coming in future cassandra releases: commitlog, hints
+transparent_data_encryption_options:
+    enabled: false
+    chunk_length_kb: 64
+    cipher: AES/CBC/PKCS5Padding
+    key_alias: testing:1
+    # CBC IV length for AES needs to be 16 bytes (which is also the default size)
+    # iv_length: 16
+    key_provider: 
+      - class_name: org.apache.cassandra.security.JKSKeyProvider
+        parameters: 
+          - keystore: conf/.keystore
+            keystore_password: cassandra
+            store_type: JCEKS
+            key_password: cassandra
+
+
+#####################
+# SAFETY THRESHOLDS #
+#####################
+
+# When executing a scan, within or across a partition, we need to keep the
+# tombstones seen in memory so we can return them to the coordinator, which
+# will use them to make sure other replicas also know about the deleted rows.
+# With workloads that generate a lot of tombstones, this can cause performance
+# problems and even exaust the server heap.
+# (http://www.datastax.com/dev/blog/cassandra-anti-patterns-queues-and-queue-like-datasets)
+# Adjust the thresholds here if you understand the dangers and want to
+# scan more tombstones anyway.  These thresholds may also be adjusted at runtime
+# using the StorageService mbean.
+tombstone_warn_threshold: 1000
+tombstone_failure_threshold: 100000
+
+# Log WARN on any multiple-partition batch size exceeding this value. 5kb per batch by default.
+# Caution should be taken on increasing the size of this threshold as it can lead to node instability.
+batch_size_warn_threshold_in_kb: 5
+
+# Fail any multiple-partition batch exceeding this value. 50kb (10x warn threshold) by default.
+batch_size_fail_threshold_in_kb: 50
+
+# Log WARN on any batches not of type LOGGED than span across more partitions than this limit
+unlogged_batch_across_partitions_warn_threshold: 10
+
+# Log a warning when compacting partitions larger than this value
+compaction_large_partition_warning_threshold_mb: 100
+
+# GC Pauses greater than gc_warn_threshold_in_ms will be logged at WARN level
+# Adjust the threshold based on your application throughput requirement
+# By default, Cassandra logs GC Pauses greater than 200 ms at INFO level
+gc_warn_threshold_in_ms: 1000
+
+# Maximum size of any value in SSTables. Safety measure to detect SSTable corruption
+# early. Any value size larger than this threshold will result into marking an SSTable
+# as corrupted.
+# max_value_size_in_mb: 256
+
+# Back-pressure settings #
+# If enabled, the coordinator will apply the back-pressure strategy specified below to each mutation
+# sent to replicas, with the aim of reducing pressure on overloaded replicas.
+back_pressure_enabled: false
+# The back-pressure strategy applied.
+# The default implementation, RateBasedBackPressure, takes three arguments:
+# high ratio, factor, and flow type, and uses the ratio between incoming mutation responses and outgoing mutation requests.
+# If below high ratio, outgoing mutations are rate limited according to the incoming rate decreased by the given factor;
+# if above high ratio, the rate limiting is increased by the given factor;
+# such factor is usually best configured between 1 and 10, use larger values for a faster recovery
+# at the expense of potentially more dropped mutations;
+# the rate limiting is applied according to the flow type: if FAST, it's rate limited at the speed of the fastest replica,
+# if SLOW at the speed of the slowest one.
+# New strategies can be added. Implementors need to implement org.apache.cassandra.net.BackpressureStrategy and
+# provide a public constructor accepting a Map<String, Object>.
+back_pressure_strategy:
+    - class_name: org.apache.cassandra.net.RateBasedBackPressure
+      parameters:
+        - high_ratio: 0.90
+          factor: 5
+          flow: FAST
+
+# Coalescing Strategies #
+# Coalescing multiples messages turns out to significantly boost message processing throughput (think doubling or more).
+# On bare metal, the floor for packet processing throughput is high enough that many applications won't notice, but in
+# virtualized environments, the point at which an application can be bound by network packet processing can be
+# surprisingly low compared to the throughput of task processing that is possible inside a VM. It's not that bare metal
+# doesn't benefit from coalescing messages, it's that the number of packets a bare metal network interface can process
+# is sufficient for many applications such that no load starvation is experienced even without coalescing.
+# There are other benefits to coalescing network messages that are harder to isolate with a simple metric like messages
+# per second. By coalescing multiple tasks together, a network thread can process multiple messages for the cost of one
+# trip to read from a socket, and all the task submission work can be done at the same time reducing context switching
+# and increasing cache friendliness of network message processing.
+# See CASSANDRA-8692 for details.
+
+# Strategy to use for coalescing messages in OutboundTcpConnection.
+# Can be fixed, movingaverage, timehorizon, disabled (default).
+# You can also specify a subclass of CoalescingStrategies.CoalescingStrategy by name.
+# otc_coalescing_strategy: DISABLED
+
+# How many microseconds to wait for coalescing. For fixed strategy this is the amount of time after the first
+# message is received before it will be sent with any accompanying messages. For moving average this is the
+# maximum amount of time that will be waited as well as the interval at which messages must arrive on average
+# for coalescing to be enabled.
+# otc_coalescing_window_us: 200
+
+# Do not try to coalesce messages if we already got that many messages. This should be more than 2 and less than 128.
+# otc_coalescing_enough_coalesced_messages: 8
+
+# How many milliseconds to wait between two expiration runs on the backlog (queue) of the OutboundTcpConnection.
+# Expiration is done if messages are piling up in the backlog. Droppable messages are expired to free the memory
+# taken by expired messages. The interval should be between 0 and 1000, and in most installations the default value
+# will be appropriate. A smaller value could potentially expire messages slightly sooner at the expense of more CPU
+# time and queue contention while iterating the backlog of messages.
+# An interval of 0 disables any wait time, which is the behavior of former Cassandra versions.
+#
+# otc_backlog_expiration_interval_ms: 200
diff --git a/etc/containerpilot.json5 b/etc/containerpilot.json5
new file mode 100644
index 0000000..4148ad1
--- /dev/null
+++ b/etc/containerpilot.json5
@@ -0,0 +1,63 @@
+{
+  consul: '{{ if .CONSUL_AGENT }}localhost{{ else }}{{ .CONSUL | default "consul"}}{{ end }}:8500',
+  logging: {
+    level: "DEBUG",
+    format: "text"
+  },
+  jobs: [
+    {{ if .CONSUL_AGENT }}
+    {
+      name: 'consul-agent',
+      exec: ['/usr/local/bin/consul', 'agent',
+                  '-data-dir=/data',
+                  '-config-dir=/config',
+                  '-log-level=err',
+                  '-rejoin',
+                  '-retry-join', '{{ .CONSUL | default "consul" }}',
+                  '-retry-max', '10',
+                  '-retry-interval', '10s'],
+      health: {
+        exec: 'wget -O /dev/null http://localhost:8500',
+        interval: 10,
+        ttl: 25
+      },
+      restarts: 'unlimited'
+    },
+    {{ end }}
+    {
+      // without a "when" field this will start first
+      name: "preStart",
+      exec: "sh /etc/preStart.sh"
+    },
+    {
+      name: "cassandra",
+      exec: "/docker-entrypoint.sh -f",
+      restarts: "unlimited",
+      port: 9042,
+      health: {
+        exec: "nodetool -u cassandra -pw cassandra status",
+        interval: 5,
+        ttl: 15,
+        timeout: "10s"
+      },
+      when: {
+        source: "preStart",
+        once: "exitSuccess",
+      },
+    },
+    {
+      name: 'onChange-cassandra',
+      exec: 'sh /etc/onChange.sh',
+      when: {
+        source: 'watch.cassandra',
+        each: 'changed'
+      }
+    }
+  ],
+  watches: [
+    {
+      name: "cassandra",
+      interval: 5,
+    }
+  ]
+}
diff --git a/etc/jmxremote.access b/etc/jmxremote.access
new file mode 100644
index 0000000..6c4e870
--- /dev/null
+++ b/etc/jmxremote.access
@@ -0,0 +1 @@
+cassandra readwrite
diff --git a/etc/jmxremote.password b/etc/jmxremote.password
new file mode 100644
index 0000000..3f65fa5
--- /dev/null
+++ b/etc/jmxremote.password
@@ -0,0 +1 @@
+cassandra cassandra
diff --git a/etc/onChange.sh b/etc/onChange.sh
new file mode 100644
index 0000000..30aae36
--- /dev/null
+++ b/etc/onChange.sh
@@ -0,0 +1,10 @@
+#!bin/sh
+consul-template -once -template /etc/cassandra/cassandra.yaml.ctmpl:/etc/cassandra/cassandra.yaml
+
+nodetool -u cassandra -pw cassandra stopdaemon
+
+# TODO: dont stopdaemon unless there are enough other nodes available
+# if [ $((`nodetool -u cassandra -pw cassandra status | grep -v $HOSTNAME | grep UN | wc -l`)) -gt 1 ]; then
+#   nodetool -u cassandra -pw cassandra stopdaemon
+# fi
+
diff --git a/etc/preStart.sh b/etc/preStart.sh
new file mode 100644
index 0000000..f23adee
--- /dev/null
+++ b/etc/preStart.sh
@@ -0,0 +1 @@
+echo preStart-cassandra fired
diff --git a/examples/compose/docker-compose.yml b/examples/compose/docker-compose.yml
new file mode 100644
index 0000000..827325d
--- /dev/null
+++ b/examples/compose/docker-compose.yml
@@ -0,0 +1,34 @@
+version: '2.1'
+# Cassandra demonstration of the Autopilot pattern
+
+services:
+  cassandra:
+    build: ../../
+    image: autopilotpattern/cassandra:latest
+    mem_limit: 1g
+    restart: always
+    dns:
+      - 127.0.0.1
+    environment:
+      - CONSUL=consul
+      - CONSUL_AGENT=1
+    links:
+      - consul:consul
+
+  # Start with a single host which will bootstrap the cluster.
+  # In production we'll want to use an HA cluster.
+  consul:
+    image: autopilotpattern/consul:0.7.2-r0.8
+    restart: always
+    mem_limit: 128m
+    ports:
+      - 8500:8500
+    dns:
+      - 127.0.0.1
+    command: >
+      /usr/local/bin/containerpilot
+      /bin/consul agent -server
+        -config-dir=/etc/consul
+        -log-level=err
+        -bootstrap-expect 1
+        -ui-dir /ui
diff --git a/makefile b/makefile
new file mode 100644
index 0000000..de8475a
--- /dev/null
+++ b/makefile
@@ -0,0 +1,24 @@
+build:
+	docker-compose \
+		-p autopilotpattern \
+		-f examples/compose/docker-compose.yml \
+		build cassandra
+
+up:
+	docker-compose \
+		-p autopilotpattern \
+		-f examples/compose/docker-compose.yml \
+		up
+
+down:
+	docker-compose \
+		-p autopilotpattern \
+		-f examples/compose/docker-compose.yml \
+		down --remove-orphans -v
+
+ps:
+	docker-compose \
+		-p autopilotpattern \
+		-f examples/compose/docker-compose.yml \
+		ps
+

From b3d9a0cae1ab21ba4bc9478037fb393c11e02390 Mon Sep 17 00:00:00 2001
From: Tomas Celaya <tjcelaya@gmail.com>
Date: Mon, 4 Dec 2017 18:34:41 -0800
Subject: [PATCH 2/7] More onChange changes

---
 Dockerfile                                    | 16 +++--
 README.md                                     | 19 ++++++
 etc/containerpilot.json5                      |  6 +-
 etc/onChange.py                               | 58 +++++++++++++++++++
 etc/onChange.sh                               | 10 ----
 etc/preStart.py                               |  2 +
 etc/preStart.sh                               |  1 -
 .../{docker-compose.yml => local-compose.yml} | 15 ++---
 examples/triton/docker-compose.yaml           | 39 +++++++++++++
 examples/triton/setup.sh                      |  1 +
 makefile                                      | 38 +++++++-----
 11 files changed, 163 insertions(+), 42 deletions(-)
 create mode 100644 etc/onChange.py
 delete mode 100644 etc/onChange.sh
 create mode 100644 etc/preStart.py
 delete mode 100644 etc/preStart.sh
 rename examples/compose/{docker-compose.yml => local-compose.yml} (71%)
 create mode 100644 examples/triton/docker-compose.yaml
 create mode 100644 examples/triton/setup.sh

diff --git a/Dockerfile b/Dockerfile
index 7738e69..b0079f2 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,9 +1,15 @@
 FROM cassandra:3.11.0
 
-# install wget unzip and dig
+# install wget unzip and dig plus python modules
 RUN set -ex \
     && apt-get update \
-    && apt-get install -y wget unzip dnsutils \
+    && apt-get install -y wget unzip dnsutils python-dev gcc \
+    && wget --quiet -O /tmp/get-pip.py https://bootstrap.pypa.io/get-pip.py \
+    && python /tmp/get-pip.py \
+    && pip install \
+       python-Consul==0.7.2 \
+       manta==2.6.0 \
+    && rm /tmp/get-pip.py \
     && rm -rf /var/lib/apt/lists/*
 
 # Install Consul
@@ -43,8 +49,8 @@ RUN export CONTAINERPILOT_CHECKSUM=8d680939a8a5c8b27e764d55a78f5e3ae7b42ef4 \
 
 COPY etc/containerpilot.json5 /etc/containerpilot.json5
 
-COPY etc/preStart.sh /etc/preStart.sh
-COPY etc/onChange.sh /etc/onChange.sh
+COPY etc/preStart.py /etc/preStart.py
+COPY etc/onChange.py /etc/onChange.py
 COPY etc/cassandra.yaml.ctmpl /etc/cassandra/cassandra.yaml.ctmpl
 
 ### Cassandra-specific setup follows
@@ -63,7 +69,7 @@ COPY etc/jmxremote.access /etc/cassandra/jmxremote.access
 
 RUN chown cassandra:cassandra /etc/cassandra/jmxremote.password /etc/cassandra/jmxremote.access \
     && chmod 400 /etc/cassandra/jmxremote.access /etc/cassandra/jmxremote.password \
-    && chmod +x /etc/preStart.sh /etc/onChange.sh
+    && chmod +x /etc/preStart.py /etc/onChange.py
 
 EXPOSE 7000 7001 7199 9042 9160
 
diff --git a/README.md b/README.md
index 4b9c17d..94db97a 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,24 @@
 # Autopilot Pattern Cassandra
 
+A blueprint for running Apache Cassandra using the [Autopilot Pattern](http://autopilotpattern.io/).
+
+Environment variables:
+
+  - `CASSANDRA_CLUSTER_NAME`: Name of cluster. Cassandra instances can only join a cluster with the
+  same `CASSANDRA_CLUSTER_NAME`. Changing this to something other than "Test Cluster"
+  is **strongly recommended**.
+  - `CASSANDRA_USER`: New user account to create upon cluster initialization. Setting this parameter
+  to something other than "cassandra" is **strongly recommended**.
+  - `CASSANDRA_PASSWORD`: password for `CASSANDRA_USER`.
+  - `CASSANDRA_KEYSPACES`: Comma-seperated list of keyspaces.
+  - `CASSANDRA_TOPOLOGY`: JSON map describing keyspaces and their respective datacenters and
+    replication factors, e.g. for a cluster deployed with at least 2 nodes in `us-east-1` and
+    `us-sw-1` Triton datacenters having a single keyspace named `demo`:
+    ```
+    { "demo": { "us-sw-1": 2 }, { "us-east-1": 2 } }
+    ```
+
+Notes:
 
 ```
 cd examples/compose
diff --git a/etc/containerpilot.json5 b/etc/containerpilot.json5
index 4148ad1..a3537b6 100644
--- a/etc/containerpilot.json5
+++ b/etc/containerpilot.json5
@@ -27,7 +27,7 @@
     {
       // without a "when" field this will start first
       name: "preStart",
-      exec: "sh /etc/preStart.sh"
+      exec: "python /etc/preStart.py"
     },
     {
       name: "cassandra",
@@ -35,7 +35,7 @@
       restarts: "unlimited",
       port: 9042,
       health: {
-        exec: "nodetool -u cassandra -pw cassandra status",
+        exec: "nodetool -u {{ .CASSANDRA_USER }} -pw {{ .CASSANDRA_PASSWORD }} status",
         interval: 5,
         ttl: 15,
         timeout: "10s"
@@ -47,7 +47,7 @@
     },
     {
       name: 'onChange-cassandra',
-      exec: 'sh /etc/onChange.sh',
+      exec: 'python /etc/onChange.py',
       when: {
         source: 'watch.cassandra',
         each: 'changed'
diff --git a/etc/onChange.py b/etc/onChange.py
new file mode 100644
index 0000000..57df349
--- /dev/null
+++ b/etc/onChange.py
@@ -0,0 +1,58 @@
+#!/usr/bin/env python
+from sys import stderr
+from os import environ, listdir
+from subprocess import check_output, Popen, PIPE, CalledProcessError
+
+class termcolor:
+  HEADER = '\033[95m'
+  OKBLUE = '\033[94m'
+  OKGREEN = '\033[92m'
+  WARNING = '\033[93m'
+  FAIL = '\033[91m'
+  END = '\033[0m'
+
+def die(err):
+  if isinstance(err, CalledProcessError):
+    msg = err.output
+  else
+    msg = err
+
+  print >> stderr, termcolor.FAIL + err + termcolor.END
+  exit(1)
+
+if 'CASSANDRA_USER' not in environ or 'CASSANDRA_PASSWORD' not in environ:
+  print >> stderr, '{} environment is missing CASSANDRA_USER or CASSANDRA_PASSWORD! {}'.format(termcolor.FAIL, termcolor.END)
+  exit(1)
+
+CASSANDRA_USER = environ["CASSANDRA_USER"]
+CASSANDRA_PASSWORD = environ["CASSANDRA_PASSWORD"]
+
+status = "UNKNOWN"
+try:
+  status = check_output([
+    "nodetool", "-u", CASSANDRA_USER, "-pw", CASSANDRA_PASSWORD, "status"])
+except CalledProcessError as e:
+    die(e)
+
+print "node status: {} {} {}".format(termcolor.OKGREEN, status, termcolor.END)
+
+try:
+  check_call([
+    "consul-template", "-once", "-template", "/etc/cassandra/cassandra.yaml.ctmpl:/etc/cassandra/cassandra.yaml"])
+except CalledProcessError as e:
+    die(e)
+
+print "template ", template
+
+# calling stopdaemon brings down cassandra.
+# it should be brought back up by containerpilot and read the new cassandra.yml
+stopdaemon = check_output([
+  "nodetool", "-u", "cassandra", "-pw", "cassandra", "stopdaemon"])
+
+print "stopdaemon result: ", stopdaemon
+
+# TODO: dont stopdaemon unless there are enough other nodes available
+# if [ $((`nodetool -u cassandra -pw cassandra status | grep -v $HOSTNAME | grep UN | wc -l`)) -gt 1 ]; then
+#   nodetool -u cassandra -pw cassandra stopdaemon
+# fi
+
diff --git a/etc/onChange.sh b/etc/onChange.sh
deleted file mode 100644
index 30aae36..0000000
--- a/etc/onChange.sh
+++ /dev/null
@@ -1,10 +0,0 @@
-#!bin/sh
-consul-template -once -template /etc/cassandra/cassandra.yaml.ctmpl:/etc/cassandra/cassandra.yaml
-
-nodetool -u cassandra -pw cassandra stopdaemon
-
-# TODO: dont stopdaemon unless there are enough other nodes available
-# if [ $((`nodetool -u cassandra -pw cassandra status | grep -v $HOSTNAME | grep UN | wc -l`)) -gt 1 ]; then
-#   nodetool -u cassandra -pw cassandra stopdaemon
-# fi
-
diff --git a/etc/preStart.py b/etc/preStart.py
new file mode 100644
index 0000000..21ab3b1
--- /dev/null
+++ b/etc/preStart.py
@@ -0,0 +1,2 @@
+#!/usr/bin/env python
+print "cassandra-preStart complete"
diff --git a/etc/preStart.sh b/etc/preStart.sh
deleted file mode 100644
index f23adee..0000000
--- a/etc/preStart.sh
+++ /dev/null
@@ -1 +0,0 @@
-echo preStart-cassandra fired
diff --git a/examples/compose/docker-compose.yml b/examples/compose/local-compose.yml
similarity index 71%
rename from examples/compose/docker-compose.yml
rename to examples/compose/local-compose.yml
index 827325d..693dbac 100644
--- a/examples/compose/docker-compose.yml
+++ b/examples/compose/local-compose.yml
@@ -3,15 +3,17 @@ version: '2.1'
 
 services:
   cassandra:
-    build: ../../
     image: autopilotpattern/cassandra:latest
-    mem_limit: 1g
     restart: always
-    dns:
-      - 127.0.0.1
+    mem_limit: 2g
+    dns: 127.0.0.1
     environment:
       - CONSUL=consul
-      - CONSUL_AGENT=1
+      - CASSANDRA_USER=cassandra
+      - CASSANDRA_PASSWORD=cassandra
+      - CASSANDRA_CLUSTER_NAME=Test Cluster
+      - CASSANDRA_KEYSPACES=demo
+      - CASSANDRA_TOPOLOGY={"demo":{"datacenter1":1}}
     links:
       - consul:consul
 
@@ -23,8 +25,7 @@ services:
     mem_limit: 128m
     ports:
       - 8500:8500
-    dns:
-      - 127.0.0.1
+    dns: 127.0.0.1
     command: >
       /usr/local/bin/containerpilot
       /bin/consul agent -server
diff --git a/examples/triton/docker-compose.yaml b/examples/triton/docker-compose.yaml
new file mode 100644
index 0000000..62bc8a6
--- /dev/null
+++ b/examples/triton/docker-compose.yaml
@@ -0,0 +1,39 @@
+version: '2.1'
+# Cassandra demonstration of the Autopilot pattern
+
+services:
+  cassandra:
+    image: autopilotpattern/cassandra:latest
+    restart: always
+    mem_limit: 4g
+    network_mode: bridge
+    labels:
+      - triton.cns.services=cassandra
+    environment:
+      - CONSUL=cassandra-consul.svc.${TRITON_CNS_SEARCH_DOMAIN_PRIVATE}
+      - CASSANDRA_USER=cassandra
+      - CASSANDRA_PASSWORD=cassandra
+      - CASSANDRA_CLUSTER_NAME=Test Cluster
+      - CASSANDRA_KEYSPACES=demo
+      - CASSANDRA_TOPOLOGY={"demo":{"datacenter1":1}}
+    links:
+      - consul:consul
+
+  # Start with a single host which will bootstrap the cluster.
+  # In production we'll want to use an HA cluster.
+  consul:
+    image: autopilotpattern/consul:0.7.2-r0.8
+    restart: always
+    mem_limit: 128m
+    network_mode: bridge
+    ports:
+      - 8500:8500
+    labels:
+      - triton.cns.services=cassandra-consul
+    command: >
+      /usr/local/bin/containerpilot
+      /bin/consul agent -server
+        -config-dir=/etc/consul
+        -log-level=err
+        -bootstrap-expect 1
+        -ui-dir /ui
diff --git a/examples/triton/setup.sh b/examples/triton/setup.sh
new file mode 100644
index 0000000..a645eb3
--- /dev/null
+++ b/examples/triton/setup.sh
@@ -0,0 +1 @@
+setup.sh
\ No newline at end of file
diff --git a/makefile b/makefile
index de8475a..7b47f07 100644
--- a/makefile
+++ b/makefile
@@ -1,24 +1,30 @@
+DC := docker-compose -p autopilotpattern -f examples/compose/local-compose.yml
+
+.PHONY: *
+
 build:
-	docker-compose \
-		-p autopilotpattern \
-		-f examples/compose/docker-compose.yml \
-		build cassandra
+	$(DC) build cassandra
 
 up:
-	docker-compose \
-		-p autopilotpattern \
-		-f examples/compose/docker-compose.yml \
-		up
+	$(DC) up
+
+restart-cassandra:
+	$(DC) stop cassandra && $(DC) rm -vf cassandra && $(DC) build cassandra && $(DC) up cassandra
+
+consul:
+	$(DC) up -d consul
 
 down:
-	docker-compose \
-		-p autopilotpattern \
-		-f examples/compose/docker-compose.yml \
-		down --remove-orphans -v
+	$(DC) down --remove-orphans -v
 
 ps:
-	docker-compose \
-		-p autopilotpattern \
-		-f examples/compose/docker-compose.yml \
-		ps
+	$(DC) ps
+
+cqlsh:
+	$(DC) exec cassandra cqlsh
+
+pyrepl:
+	$(DC) exec cassandra env PYTHONSTARTUP=/.pythonrc python
 
+bash:
+	$(DC) exec cassandra bash

From faf4bb05ecd8bcf3e5f8bf57e4c7c13b9898ee04 Mon Sep 17 00:00:00 2001
From: Tomas Celaya <tjcelaya@gmail.com>
Date: Thu, 7 Dec 2017 18:07:45 -0800
Subject: [PATCH 3/7] Multi-DC peering almost working

---
 .gitignore                              |   3 +
 Dockerfile                              |  35 +++---
 etc/cassandra.yaml.ctmpl                |   4 +-
 etc/containerpilot.json5                |  18 ++-
 etc/containerpilot_handler.py           | 104 +++++++++++++++
 etc/containerpilot_handler/__init__.py  |   0
 etc/containerpilot_handler/cassandra.py | 161 ++++++++++++++++++++++++
 etc/containerpilot_handler/storage.py   |  32 +++++
 etc/containerpilot_handler/utils.py     |  89 +++++++++++++
 etc/onChange.py                         |  58 ---------
 etc/preStart.py                         |   2 -
 examples/compose/local-compose.yml      |  52 +++++---
 examples/triton/setup.sh                |   1 -
 13 files changed, 453 insertions(+), 106 deletions(-)
 create mode 100644 .gitignore
 create mode 100644 etc/containerpilot_handler.py
 create mode 100644 etc/containerpilot_handler/__init__.py
 create mode 100644 etc/containerpilot_handler/cassandra.py
 create mode 100644 etc/containerpilot_handler/storage.py
 create mode 100644 etc/containerpilot_handler/utils.py
 delete mode 100644 etc/onChange.py
 delete mode 100644 etc/preStart.py

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..d4a80c8
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+.DS_Store
+*.pyc
+
diff --git a/Dockerfile b/Dockerfile
index b0079f2..85a28ff 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -3,12 +3,16 @@ FROM cassandra:3.11.0
 # install wget unzip and dig plus python modules
 RUN set -ex \
     && apt-get update \
-    && apt-get install -y wget unzip dnsutils python-dev gcc \
+    && apt-get install --no-install-recommends -y wget unzip dnsutils python-dev gcc \
     && wget --quiet -O /tmp/get-pip.py https://bootstrap.pypa.io/get-pip.py \
     && python /tmp/get-pip.py \
     && pip install \
-       python-Consul==0.7.2 \
-       manta==2.6.0 \
+        # trying to use cqlsh to do this stuff, installing cassandra-driver takes _forever_
+        # cassandra-driver==3.12.0 \
+        python-Consul==0.7.2 \
+        manta==2.6.0 \
+        pyyaml==3.12 \
+    && apt-get purge -y python-dev gcc \
     && rm /tmp/get-pip.py \
     && rm -rf /var/lib/apt/lists/*
 
@@ -49,27 +53,18 @@ RUN export CONTAINERPILOT_CHECKSUM=8d680939a8a5c8b27e764d55a78f5e3ae7b42ef4 \
 
 COPY etc/containerpilot.json5 /etc/containerpilot.json5
 
-COPY etc/preStart.py /etc/preStart.py
-COPY etc/onChange.py /etc/onChange.py
-COPY etc/cassandra.yaml.ctmpl /etc/cassandra/cassandra.yaml.ctmpl
-
 ### Cassandra-specific setup follows
 
-ENV LOCAL_JMX=no
-
-# only the access line actually seems to do anything
-RUN echo 'if [ "$LOCAL_JMX" = "no" ]; then' "\n" \
-           'JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.authenticate=true"' "\n" \
-           'JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.password.file=/etc/cassandra/jmxremote.password"' "\n" \
-           'JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.access.file=/etc/cassandra/jmxremote.access"' "\n" \
-         'fi' "\n" >> /etc/cassandra/cassandra-env.sh
+COPY etc/containerpilot_handler /usr/local/bin/containerpilot_handler
+COPY etc/containerpilot_handler.py /usr/local/bin/containerpilot_handler.py
+COPY etc/cassandra.yaml.ctmpl /etc/cassandra/cassandra.yaml.ctmpl
 
-COPY etc/jmxremote.password /etc/cassandra/jmxremote.password
-COPY etc/jmxremote.access /etc/cassandra/jmxremote.access
+# disable the automatic seed configuration that enables single-node bootstrapping
+# the first line corresponds to "always set self as seed"
+# the second line actually inserts CASSANDRA_SEEDS into the cassandra.yaml
+RUN sed -ri '/CASSANDRA_SEEDS.*CASSANDRA_BROADCAST_ADDRESS/d' /docker-entrypoint.sh && \
+    sed -ri '/sed -ri.*CASSANDRA_SEEDS.*\/cassandra.yaml/d' /docker-entrypoint.sh
 
-RUN chown cassandra:cassandra /etc/cassandra/jmxremote.password /etc/cassandra/jmxremote.access \
-    && chmod 400 /etc/cassandra/jmxremote.access /etc/cassandra/jmxremote.password \
-    && chmod +x /etc/preStart.py /etc/onChange.py
 
 EXPOSE 7000 7001 7199 9042 9160
 
diff --git a/etc/cassandra.yaml.ctmpl b/etc/cassandra.yaml.ctmpl
index 8308595..bebfe7a 100644
--- a/etc/cassandra.yaml.ctmpl
+++ b/etc/cassandra.yaml.ctmpl
@@ -421,7 +421,9 @@ seed_provider:
       parameters:
           # seeds is actually a comma-delimited list of addresses.
           # Ex: "<ip1>,<ip2>,<ip3>"
-          - seeds: {{ range service "cassandra" }}{{ .Address }},{{ end }}
+          - seeds: {{ range datacenters }}{{ $seedKey := printf "cassandra-seeds@%s" . }}{{ if keyExists $seedKey }}{{ key $seedKey }},{{ end }}{{ end }}
+
+# {{ range datacenters }}{{ $seedKey := printf "cassandra-seeds-%s" . }}{{ $seedKey }}:{{ if keyExists $seedKey }}{{ key $seedKey }},{{ end }}{{ end }}
 
 # For workloads with more data than can fit in memory, Cassandra's
 # bottleneck will be reads that need to fetch data from
diff --git a/etc/containerpilot.json5 b/etc/containerpilot.json5
index a3537b6..cfcb37e 100644
--- a/etc/containerpilot.json5
+++ b/etc/containerpilot.json5
@@ -1,11 +1,10 @@
 {
-  consul: '{{ if .CONSUL_AGENT }}localhost{{ else }}{{ .CONSUL | default "consul"}}{{ end }}:8500',
+  consul: 'localhost:8500',
   logging: {
     level: "DEBUG",
-    format: "text"
+    format: "default"
   },
   jobs: [
-    {{ if .CONSUL_AGENT }}
     {
       name: 'consul-agent',
       exec: ['/usr/local/bin/consul', 'agent',
@@ -23,11 +22,10 @@
       },
       restarts: 'unlimited'
     },
-    {{ end }}
     {
       // without a "when" field this will start first
-      name: "preStart",
-      exec: "python /etc/preStart.py"
+      name: 'cassandra-preStart',
+      exec: ['python', '/usr/local/bin/containerpilot_handler.py', 'preStart'],
     },
     {
       name: "cassandra",
@@ -41,13 +39,13 @@
         timeout: "10s"
       },
       when: {
-        source: "preStart",
-        once: "exitSuccess",
+        source: 'cassandra-preStart',
+        once: 'exitSuccess',
       },
     },
     {
-      name: 'onChange-cassandra',
-      exec: 'python /etc/onChange.py',
+      name: 'cassandra-onChange',
+      exec: ['python', '/usr/local/bin/containerpilot_handler.py', 'onChange'],
       when: {
         source: 'watch.cassandra',
         each: 'changed'
diff --git a/etc/containerpilot_handler.py b/etc/containerpilot_handler.py
new file mode 100644
index 0000000..8de03ee
--- /dev/null
+++ b/etc/containerpilot_handler.py
@@ -0,0 +1,104 @@
+#!/usr/bin/env python
+from __future__ import print_function
+from sys import argv, stderr, path
+from consul import Consul
+from socket import gethostname
+from os.path import exists, isdir
+
+from containerpilot_handler.cassandra import Cassandra
+from containerpilot_handler.utils import resolve_home, resolve_cluster_name, resolve_credentials, resolve_datacenter, resolve_storage, await_leader, log
+
+
+def main(args):
+  log('containerpilot_handler started')
+
+  CASSANDRA_HOME = resolve_home()
+  CASSANDRA_USER, CASSANDRA_PASSWORD = resolve_credentials()
+  consul = Consul()
+  CASSANDRA_DC = resolve_datacenter(consul)
+
+  storage = resolve_storage()
+
+  await_leader(consul)
+
+  node = Cassandra(consul, storage, CASSANDRA_HOME, CASSANDRA_USER, CASSANDRA_PASSWORD, CASSANDRA_DC, resolve_cluster_name())
+
+  log('node configuration: {}'.format(node))
+
+
+  current_seeds = node.query_seeds()
+
+  if not node.enough_seeds_exist(current_seeds) and not node.already_registered_as_seed(current_seeds):
+    log('volunteering as seed node')
+    node.register_as_seed(current_seeds)
+
+
+  if 'preStart' in args:
+    # render our template in case there are existing seeds
+    log('rendering configuration during preStart')
+
+    # attempting to render the config immediately can result in our own volunteering being omitted
+    # TODO: figure out what consul-template config would work like this (and not block indefinitely)
+    from time import sleep
+    sleep(1)
+
+    node.render_config()
+
+    # create a lock to track our snapshots
+    if node.storage is None:
+      log('no storage configured, skipping snapshots')
+    else:
+      _, snapshot_lock = consul.kv.get(node.build_snapshot_key())
+      if snapshot_lock is None:
+        log('FLAG_SNAPSHOT_REQUIRED was missing')
+        snapshot_set = consul.kv.put(node.build_snapshot_key(), Cassandra.FLAG_SNAPSHOT_REQUIRED, acquire=node.session_id)
+        if not snapshot_set:
+          raise ValueError('error occurred while setting FLAG_SNAPSHOT_REQUIRED')
+        else:
+          log('FLAG_SNAPSHOT_REQUIRED recorded for {}'.format(node.id))
+
+    log('preStart complete')
+    return
+
+  if 'onChange' in args:
+
+    if node.storage is None:
+      log('no storage configured, skipping snapshots')
+    else:
+      tag = node.record_snapshots()
+      snapshot_dirs = ['{}/data/{}/snapshots/{}'.format(node.home, k, tag) for k in node.list_keyspaces()]
+      
+      log('snapshot dirs: {}'.format(str(snapshot_dirs)))
+
+      for d in snapshot_dirs:
+        if not isdir(d):
+          log('snapshot directory missing: {}'.format(d))
+
+    # snapshot_state = node.query_snapshot_state()
+    # log('snapshot state: {}'.format(snapshot_state))
+
+    # if snapshot_state is None or snapshot_state == Cassandra.FLAG_SNAPSHOT_REQUIRED:
+    #   log('recording snapshot')
+    #   # node.record_snapshots()
+    #   # node.ship_snapshots()
+
+    log('onChange complete')
+    return
+
+
+# snapshot_available = node.query_snapshot_state()
+
+
+# current_peers = node.query_peers()
+
+
+# # TODO: dont stopdaemon unless there are enough other nodes available
+# # if [ $((`nodetool -u cassandra -pw cassandra status | grep -v $HOSTNAME | grep UN | wc -l`)) -gt 1 ]; then
+# #   nodetool -u cassandra -pw cassandra stopdaemon
+# # fi
+
+# https://stackoverflow.com/questions/6323860/sibling-package-imports
+# Ugly hack to allow absolute import from the root folder
+# whatever its name is. Please forgive the heresy.
+if __name__ == "__main__":
+  main(argv[1:])
\ No newline at end of file
diff --git a/etc/containerpilot_handler/__init__.py b/etc/containerpilot_handler/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/etc/containerpilot_handler/cassandra.py b/etc/containerpilot_handler/cassandra.py
new file mode 100644
index 0000000..5ed4f86
--- /dev/null
+++ b/etc/containerpilot_handler/cassandra.py
@@ -0,0 +1,161 @@
+from __future__ import print_function
+from os import listdir
+from sys import stderr
+from socket import gethostname, gethostbyname
+from datetime import datetime
+from containerpilot_handler.utils import log
+import yaml
+from subprocess import check_output, check_call, STDOUT, PIPE, CalledProcessError
+
+class Cassandra(object):
+
+  FLAG_SNAPSHOT_REQUIRED = 'SNAPSHOT_REQUIRED'
+
+  def __init__(self, consul, storage, home, user, password, datacenter, cluster_name):
+    self.id = "cassandra-{}".format(gethostname())
+    self.consul = consul
+    self.storage = storage
+    self.home = home
+    self.user = user
+    self.password = password
+    self.datacenter = datacenter
+    self.cluster_name = cluster_name
+
+    self.session_id = self.consul.session.create(self.id, behavior='delete', ttl=120)
+
+  def __str__(self):
+    return 'Cassandra <id={}, consul={}, storage={}, user={}, datacenter={}, cluster_name={} session_id={}>'.format(
+        self.id, self.consul, self.storage, self.user, self.datacenter, self.cluster_name, self.session_id)
+    
+  def build_seeds_key(self):
+    return 'cassandra-seeds-{}-{}'.format(self.cluster_name, self.datacenter)
+
+  def build_snapshot_key(self):
+    return 'cassandra-snapshot-{}-{}'.format(self.datacenter, gethostname())
+
+  def query_snapshot_state(self):
+    _, snapshot = self.consul.kv.get(self.build_snapshot_key())
+
+    if snapshot is None:
+      return None
+
+    return snapshot['Value']
+
+  def query_seeds(self):
+    _, seeds = self.consul.kv.get(self.build_seeds_key())
+    if seeds is None:
+      return None
+
+    return seeds['Value'].split(',')
+
+  def read_saved_seeds(self, should_retry=True):
+    loaded_conf = None
+    seed_list = []
+    with open('/etc/cassandra/cassandra.yaml', 'r') as conf:
+      try:
+        loaded_conf = yaml.load(conf)
+      except YAMLError as exc:
+        log('error occurred while reading cassandra configuration:', exc)
+        return []
+
+    if loaded_conf is None:
+      if should_retry:
+        log('our configuration file was missing!?')
+        self.render_config()
+        return self.read_saved_seeds(should_retry=False)
+      else:
+        log('config file missing after retry! raising error')
+        raise ValueError('failed to render and load configuration')
+
+    try:
+      seed_list = loaded_conf['seed_provider'][0]['parameters'][0]
+    except KeyError as e:
+      log('error occurred while accessing configuration file seed list:', e)
+
+    return seed_list
+
+  def enough_seeds_exist(self, seeds):
+    if seeds is None:
+      return False
+
+    parsed_seeds = [s.strip() for s in seeds]
+    return 1 < len(parsed_seeds)
+
+  def already_registered_as_seed(self, seeds):
+    if seeds is None:
+      return False
+
+    return gethostbyname(gethostname()) in [s.strip() for s in seeds]
+
+  def register_as_seed(self, seeds):
+    if seeds is None:
+      seeds = []
+
+    own_ip = gethostbyname(gethostname())
+
+    seeds.append(own_ip)
+
+    registered = self.consul.kv.put(self.build_seeds_key(), ','.join(seeds))
+    if not registered:
+      raise ValueError('Failed to register ourselves as a seed')
+
+  def render_config(self):
+    log('SO CLOSE')
+    check_call([
+      'consul-template', '-once', '-template', '/etc/cassandra/cassandra.yaml.ctmpl:/etc/cassandra/cassandra.yaml'])
+    log('template rendered to: {}'.format('/etc/cassandra/cassandra.yaml'))
+
+  def record_snapshots(self):
+    """
+    records a snapshot of all keyspaces tagged with the current UTC datetime
+    :return the snapshot tag, as ISO8601 UTC (condensed) datetime
+    """
+    if self.storage is None:
+      return None
+
+    dt = datetime.utcnow().strftime('%Y%m%dT%H%M%S')
+
+    try:
+      snapshot = check_output([
+        'nodetool', '-u', self.user, '-pw', self.password, 'snapshot', '--tag', dt],
+        stderr=STDOUT)
+      log('snapshot result: {}'.format(snapshot))
+    except CalledProcessError as e:
+      log('snapshot failed: {}'.format(e.output))
+
+
+    return dt
+
+  def ship_snapshots(self, snapshot_tag, keyspaces=None):
+
+    zipped_snapshot = None
+
+    if keyspaces is None:
+      keyspaces = self.list_keyspaces()
+
+    if 0 == len(keyspaces):
+      raise Exception('no keyspaces to search for snapshots')
+
+    log('shipping snapshots for keyspaces: {}'.format(str(keyspaces)))
+
+    # for ks in keyspaces:
+    #   self.home()
+
+  def list_keyspaces(self):
+    keyspaces = []
+
+    try:
+      keyspace_output = check_output([
+        'cqlsh', '-u', self.user, '-pw', self.password, '--no-color', '-e', 'DESCRIBE KEYSPACES'],
+        stderr=STDOUT)
+
+      keyspaces = keyspace_output.strip().split()
+    except CalledProcessError as e:
+      raise Exception('error occurred while listing keyspaces: {}'.format(e.output))
+
+    return keyspaces
+
+  def stop(self):
+    stopdaemon = check_output([
+      'nodetool', '-u', self.user, '-pw', self.password, 'stopdaemon'])
+    log('stopdaemon result: {}'.format(stopdaemon))
diff --git a/etc/containerpilot_handler/storage.py b/etc/containerpilot_handler/storage.py
new file mode 100644
index 0000000..36d6a3a
--- /dev/null
+++ b/etc/containerpilot_handler/storage.py
@@ -0,0 +1,32 @@
+from __future__ import print_function
+from abc import ABCMeta, abstractmethod
+
+class Storage(object):
+  __metaclass__ = ABCMeta
+  @abstractmethod
+  def store(self, path):
+    pass
+  @abstractmethod
+  def load(self, path):
+    pass
+
+class Local(Storage):
+  def __init__(self, base_path):
+    self.base_path = base_path
+
+  def store(self, path):
+    pass
+
+  def load(self, path):
+    pass
+
+class Manta(Storage):
+  def __init__(self, base_path, private_key_content):
+    self.base_path = base_path
+    self.private_key_content = private_key_content
+
+  def store(self, path):
+    pass
+
+  def load(self, path):
+    pass
\ No newline at end of file
diff --git a/etc/containerpilot_handler/utils.py b/etc/containerpilot_handler/utils.py
new file mode 100644
index 0000000..5f13090
--- /dev/null
+++ b/etc/containerpilot_handler/utils.py
@@ -0,0 +1,89 @@
+from __future__ import print_function
+from os import environ
+from os.path import exists
+from sys import stderr
+from time import sleep
+from urlparse import urlparse
+from consul import Consul, ConsulException
+from containerpilot_handler.storage import Local, Manta
+
+class termcolor:
+  HEADER = '\033[95m'
+  OKBLUE = '\033[94m'
+  OKGREEN = '\033[92m'
+  WARNING = '\033[93m'
+  FAIL = '\033[91m'
+  END = '\033[0m'
+
+def log(s):
+  print('HANDLER: {}{}{}'.format(termcolor.OKBLUE, s, termcolor.END))
+
+def die(err):
+  if isinstance(err, Exception):
+    msg = err.output
+  else:
+    msg = err
+
+  print(termcolor.FAIL + err + termcolor.END, file=stderr)
+  exit(1)
+
+def resolve_home():
+  if 'CASSANDRA_HOME' in environ:
+    return environ['CASSANDRA_HOME']
+
+  return '/var/lib/cassandra'
+
+def resolve_cluster_name():
+  if 'CASSANDRA_CLUSTER_NAME' in environ:
+    return environ['CASSANDRA_CLUSTER_NAME']
+
+  return 'test'
+
+def resolve_credentials():
+  if 'CASSANDRA_USER' not in environ or 'CASSANDRA_PASSWORD' not in environ:
+    die('environment is missing CASSANDRA_USER or CASSANDRA_PASSWORD!')
+  
+  return environ["CASSANDRA_USER"], environ["CASSANDRA_PASSWORD"]
+
+def resolve_datacenter(c):
+  if 'CASSANDRA_DC' in environ:
+    return environ['CASSANDRA_DC']
+
+  if not isinstance(c, Consul):
+    raise ValueError('unexpected type for consul instance when resolving datacenter: {}'.format(type(c)))
+
+  consulAgentInfo = c.agent.self()
+
+  if 'Config' in consulAgentInfo and 'Datacenter' in consulAgentInfo['Config']:
+    return consulAgentInfo['Config']['Datacenter']
+  else:
+    return None
+
+def resolve_storage():
+  if 'SNAPSHOT_TARGET' not in environ:
+    return None
+
+  uri = urlparse(environ['SNAPSHOT_TARGET'])
+  if 'manta' in uri.scheme:
+    return Manta()
+  
+  if environ['SNAPSHOT_TARGET'].startswith('local'):
+    return Local()
+  
+  return None
+
+def await_leader(consul, max_duration=10, attempts=6):
+  known_leader = ''
+
+  while known_leader == '' and 0 < attempts:
+
+    try:
+      known_leader = consul.status.leader()
+    except ConsulException as e:
+      log('no leader elected yet', file=stderr)
+
+    sleep(max_duration / attempts)
+    attempts -= 1
+
+  if consul.status.leader() == '':
+    raise Exception('no leader elected in time')
diff --git a/etc/onChange.py b/etc/onChange.py
deleted file mode 100644
index 57df349..0000000
--- a/etc/onChange.py
+++ /dev/null
@@ -1,58 +0,0 @@
-#!/usr/bin/env python
-from sys import stderr
-from os import environ, listdir
-from subprocess import check_output, Popen, PIPE, CalledProcessError
-
-class termcolor:
-  HEADER = '\033[95m'
-  OKBLUE = '\033[94m'
-  OKGREEN = '\033[92m'
-  WARNING = '\033[93m'
-  FAIL = '\033[91m'
-  END = '\033[0m'
-
-def die(err):
-  if isinstance(err, CalledProcessError):
-    msg = err.output
-  else
-    msg = err
-
-  print >> stderr, termcolor.FAIL + err + termcolor.END
-  exit(1)
-
-if 'CASSANDRA_USER' not in environ or 'CASSANDRA_PASSWORD' not in environ:
-  print >> stderr, '{} environment is missing CASSANDRA_USER or CASSANDRA_PASSWORD! {}'.format(termcolor.FAIL, termcolor.END)
-  exit(1)
-
-CASSANDRA_USER = environ["CASSANDRA_USER"]
-CASSANDRA_PASSWORD = environ["CASSANDRA_PASSWORD"]
-
-status = "UNKNOWN"
-try:
-  status = check_output([
-    "nodetool", "-u", CASSANDRA_USER, "-pw", CASSANDRA_PASSWORD, "status"])
-except CalledProcessError as e:
-    die(e)
-
-print "node status: {} {} {}".format(termcolor.OKGREEN, status, termcolor.END)
-
-try:
-  check_call([
-    "consul-template", "-once", "-template", "/etc/cassandra/cassandra.yaml.ctmpl:/etc/cassandra/cassandra.yaml"])
-except CalledProcessError as e:
-    die(e)
-
-print "template ", template
-
-# calling stopdaemon brings down cassandra.
-# it should be brought back up by containerpilot and read the new cassandra.yml
-stopdaemon = check_output([
-  "nodetool", "-u", "cassandra", "-pw", "cassandra", "stopdaemon"])
-
-print "stopdaemon result: ", stopdaemon
-
-# TODO: dont stopdaemon unless there are enough other nodes available
-# if [ $((`nodetool -u cassandra -pw cassandra status | grep -v $HOSTNAME | grep UN | wc -l`)) -gt 1 ]; then
-#   nodetool -u cassandra -pw cassandra stopdaemon
-# fi
-
diff --git a/etc/preStart.py b/etc/preStart.py
deleted file mode 100644
index 21ab3b1..0000000
--- a/etc/preStart.py
+++ /dev/null
@@ -1,2 +0,0 @@
-#!/usr/bin/env python
-print "cassandra-preStart complete"
diff --git a/examples/compose/local-compose.yml b/examples/compose/local-compose.yml
index 693dbac..7c860d2 100644
--- a/examples/compose/local-compose.yml
+++ b/examples/compose/local-compose.yml
@@ -3,33 +3,57 @@ version: '2.1'
 
 services:
   cassandra:
+    build: ../../
     image: autopilotpattern/cassandra:latest
     restart: always
     mem_limit: 2g
     dns: 127.0.0.1
+    # uncomment the following lines for more rapid development
+    volumes:
+      - ../../etc/containerpilot_handler:/usr/local/bin/containerpilot_handler
+      - ../../etc/containerpilot_handler.py:/usr/local/bin/containerpilot_handler.py
+      - ../../etc/cassandra.yaml.ctmpl:/etc/cassandra/cassandra.yaml.ctmpl
+      - ../../tmp:/tmp/snapshots
     environment:
-      - CONSUL=consul
+      - CONSUL=consuldc1
       - CASSANDRA_USER=cassandra
       - CASSANDRA_PASSWORD=cassandra
-      - CASSANDRA_CLUSTER_NAME=Test Cluster
+      - CASSANDRA_CLUSTER_NAME=demo
       - CASSANDRA_KEYSPACES=demo
       - CASSANDRA_TOPOLOGY={"demo":{"datacenter1":1}}
+      - CASSANDRA_ENDPOINT_SNITCH=SimpleSnitch
+      - SNAPSHOT_TARGET=file:///tmp/snapshots
+      # the following options pertain to multi-datacenter deployments
+      # - CASSANDRA_ENDPOINT_SNITCH=GossipingPropertyFileSnitch
+      # - CASSANDRA_DC=
+      # - CASSANDRA_RACK=
     links:
-      - consul:consul
+      - consuldc1:consul
 
-  # Start with a single host which will bootstrap the cluster.
-  # In production we'll want to use an HA cluster.
-  consul:
-    image: autopilotpattern/consul:0.7.2-r0.8
+  consuldc1:
+    build: .
+    image: autopilotpattern/consul:${TAG:-latest}
     restart: always
     mem_limit: 128m
     ports:
-      - 8500:8500
-    dns: 127.0.0.1
+        - 8500
+    environment:
+      - CONSUL=consuldc1
+      - CONSUL_DATACENTER_NAME=dc1
+    command: >
+      /usr/local/bin/containerpilot
+
+  consuldc2:
+    image: autopilotpattern/consul:${TAG:-latest}
+    restart: always
+    mem_limit: 128m
+    ports:
+        - 8500
+    environment:
+      - CONSUL=consuldc2
+      - CONSUL_DATACENTER_NAME=dc2
+      - CONSUL_RETRY_JOIN_WAN="consuldc1"
     command: >
       /usr/local/bin/containerpilot
-      /bin/consul agent -server
-        -config-dir=/etc/consul
-        -log-level=err
-        -bootstrap-expect 1
-        -ui-dir /ui
+    links:
+      - consuldc1
diff --git a/examples/triton/setup.sh b/examples/triton/setup.sh
index a645eb3..e69de29 100644
--- a/examples/triton/setup.sh
+++ b/examples/triton/setup.sh
@@ -1 +0,0 @@
-setup.sh
\ No newline at end of file

From da0e66ba2b3eab58910407c8f10b5f5fe9a7d0cb Mon Sep 17 00:00:00 2001
From: Tomas Celaya <tjcelaya@gmail.com>
Date: Thu, 7 Dec 2017 18:22:58 -0800
Subject: [PATCH 4/7] Fix seeds key format

---
 etc/cassandra.yaml.ctmpl | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/etc/cassandra.yaml.ctmpl b/etc/cassandra.yaml.ctmpl
index bebfe7a..e24c53c 100644
--- a/etc/cassandra.yaml.ctmpl
+++ b/etc/cassandra.yaml.ctmpl
@@ -421,9 +421,10 @@ seed_provider:
       parameters:
           # seeds is actually a comma-delimited list of addresses.
           # Ex: "<ip1>,<ip2>,<ip3>"
-          - seeds: {{ range datacenters }}{{ $seedKey := printf "cassandra-seeds@%s" . }}{{ if keyExists $seedKey }}{{ key $seedKey }},{{ end }}{{ end }}
+          {{ $clusterName := env "CASSANDRA_CLUSTER_NAME" }}
+          - seeds: {{ range datacenters }}{{ $seedKey := printf "cassandra-seeds-%s-%s@%s" $clusterName . . }}{{ if keyExists $seedKey }}{{ key $seedKey }},{{ end }}{{ end }}
 
-# {{ range datacenters }}{{ $seedKey := printf "cassandra-seeds-%s" . }}{{ $seedKey }}:{{ if keyExists $seedKey }}{{ key $seedKey }},{{ end }}{{ end }}
+# {{ range datacenters }}{{ $seedKey := printf "cassandra-seeds-%s-%s@%s" $clusterName . . }}{{ if keyExists $seedKey }}{{ key $seedKey }},{{ end }}{{ end }}
 
 # For workloads with more data than can fit in memory, Cassandra's
 # bottleneck will be reads that need to fetch data from

From 156ac19c9e5a829398eef627084e6e0749120819 Mon Sep 17 00:00:00 2001
From: Tomas Celaya <tjcelaya@gmail.com>
Date: Fri, 8 Dec 2017 16:52:40 -0800
Subject: [PATCH 5/7] Preventing every node from bootstrapping itself. One node
 can grab the seeds lock at a time and all others will render their configs to
 include the seed node (even if it is still bootstrapping)

---
 Dockerfile                              |  4 +--
 etc/containerpilot_handler.py           | 21 +++++++++++-----
 etc/containerpilot_handler/cassandra.py | 33 ++++++++++++++++++++-----
 makefile                                | 11 +++++++--
 4 files changed, 53 insertions(+), 16 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 85a28ff..65a1125 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -41,9 +41,9 @@ RUN set -ex \
 
 # Add Containerpilot and set its configuration
 ENV CONTAINERPILOT /etc/containerpilot.json5
-ENV CONTAINERPILOT_VERSION 3.3.3
+ENV CONTAINERPILOT_VERSION 3.6.1
 
-RUN export CONTAINERPILOT_CHECKSUM=8d680939a8a5c8b27e764d55a78f5e3ae7b42ef4 \
+RUN export CONTAINERPILOT_CHECKSUM=57857530356708e9e8672d133b3126511fb785ab \
     && export archive=containerpilot-${CONTAINERPILOT_VERSION}.tar.gz \
     && wget --quiet -O /tmp/${archive} \
          "https://github.com/joyent/containerpilot/releases/download/${CONTAINERPILOT_VERSION}/${archive}" \
diff --git a/etc/containerpilot_handler.py b/etc/containerpilot_handler.py
index 8de03ee..59356c9 100644
--- a/etc/containerpilot_handler.py
+++ b/etc/containerpilot_handler.py
@@ -1,5 +1,6 @@
 #!/usr/bin/env python
 from __future__ import print_function
+from time import sleep
 from sys import argv, stderr, path
 from consul import Consul
 from socket import gethostname
@@ -10,7 +11,7 @@
 
 
 def main(args):
-  log('containerpilot_handler started')
+  log('containerpilot_handler started: {}'.format(str(args)))
 
   CASSANDRA_HOME = resolve_home()
   CASSANDRA_USER, CASSANDRA_PASSWORD = resolve_credentials()
@@ -25,21 +26,29 @@ def main(args):
 
   log('node configuration: {}'.format(node))
 
-
   current_seeds = node.query_seeds()
 
-  if not node.enough_seeds_exist(current_seeds) and not node.already_registered_as_seed(current_seeds):
-    log('volunteering as seed node')
-    node.register_as_seed(current_seeds)
+  if 'fakeBoot' in args:
+    log('pretending to boot')
+    sleep(10)
+    log('fakeBoot complete')
+    return
 
 
   if 'preStart' in args:
+    # loop while we try to grab a lock on the seeds list
+    while not node.enough_seeds_exist(current_seeds) and not node.register_as_seed(current_seeds):
+      sleep(5)
+      current_seeds = node.query_seeds()
+      log('waiting for seeds lock, current seed list: {}'.format(str(current_seeds)))
+
+    # either enough seed nodes appeared in consul kv or we managed to add ourselves and grab the lock
+
     # render our template in case there are existing seeds
     log('rendering configuration during preStart')
 
     # attempting to render the config immediately can result in our own volunteering being omitted
     # TODO: figure out what consul-template config would work like this (and not block indefinitely)
-    from time import sleep
     sleep(1)
 
     node.render_config()
diff --git a/etc/containerpilot_handler/cassandra.py b/etc/containerpilot_handler/cassandra.py
index 5ed4f86..7459cb6 100644
--- a/etc/containerpilot_handler/cassandra.py
+++ b/etc/containerpilot_handler/cassandra.py
@@ -1,5 +1,5 @@
 from __future__ import print_function
-from os import listdir
+from os.path import exists
 from sys import stderr
 from socket import gethostname, gethostbyname
 from datetime import datetime
@@ -11,6 +11,8 @@ class Cassandra(object):
 
   FLAG_SNAPSHOT_REQUIRED = 'SNAPSHOT_REQUIRED'
 
+  FILE_SESSION_ID = '/tmp/consul.session'
+
   def __init__(self, consul, storage, home, user, password, datacenter, cluster_name):
     self.id = "cassandra-{}".format(gethostname())
     self.consul = consul
@@ -21,7 +23,8 @@ def __init__(self, consul, storage, home, user, password, datacenter, cluster_na
     self.datacenter = datacenter
     self.cluster_name = cluster_name
 
-    self.session_id = self.consul.session.create(self.id, behavior='delete', ttl=120)
+    self.session_id = self.load_or_create_session()
+    self.persist_session()
 
   def __str__(self):
     return 'Cassandra <id={}, consul={}, storage={}, user={}, datacenter={}, cluster_name={} session_id={}>'.format(
@@ -33,6 +36,22 @@ def build_seeds_key(self):
   def build_snapshot_key(self):
     return 'cassandra-snapshot-{}-{}'.format(self.datacenter, gethostname())
 
+  def load_or_create_session(self):
+    if exists(Cassandra.FILE_SESSION_ID):
+      log('found session file')
+      with open(Cassandra.FILE_SESSION_ID, 'r') as session_file:
+        return session_file.read()
+
+    log('creating new session')
+    return self.consul.session.create(self.id, behavior='delete', ttl=120)
+
+  def persist_session(self):
+    with open(Cassandra.FILE_SESSION_ID, 'w') as session_file:
+      session_file.write(self.session_id)
+
+    log('renewing persisted session: {}'.format(self.session_id))
+    self.consul.session.renew(self.session_id)
+
   def query_snapshot_state(self):
     _, snapshot = self.consul.kv.get(self.build_snapshot_key())
 
@@ -46,6 +65,9 @@ def query_seeds(self):
     if seeds is None:
       return None
 
+    if seeds['Value'] is None:
+      return []
+
     return seeds['Value'].split(',')
 
   def read_saved_seeds(self, should_retry=True):
@@ -79,7 +101,8 @@ def enough_seeds_exist(self, seeds):
       return False
 
     parsed_seeds = [s.strip() for s in seeds]
-    return 1 < len(parsed_seeds)
+    # ideally there should be two (three max) seeds per DC, but we'll start with just one
+    return 0 < len(parsed_seeds)
 
   def already_registered_as_seed(self, seeds):
     if seeds is None:
@@ -95,9 +118,7 @@ def register_as_seed(self, seeds):
 
     seeds.append(own_ip)
 
-    registered = self.consul.kv.put(self.build_seeds_key(), ','.join(seeds))
-    if not registered:
-      raise ValueError('Failed to register ourselves as a seed')
+    return self.consul.kv.put(self.build_seeds_key(), ','.join(seeds), acquire=self.session_id)
 
   def render_config(self):
     log('SO CLOSE')
diff --git a/makefile b/makefile
index 7b47f07..05eccb3 100644
--- a/makefile
+++ b/makefile
@@ -1,5 +1,8 @@
 DC := docker-compose -p autopilotpattern -f examples/compose/local-compose.yml
 
+CONSUL_ADDR := $(shell $(DC) ps consuldc1 | egrep -o '0.0.0.0:\d+' | head -1)
+CONSUL_URL := $(shell echo "http://$(CONSUL_ADDR)")
+
 .PHONY: *
 
 build:
@@ -9,10 +12,14 @@ up:
 	$(DC) up
 
 restart-cassandra:
-	$(DC) stop cassandra && $(DC) rm -vf cassandra && $(DC) build cassandra && $(DC) up cassandra
+	$(DC) stop cassandra
+	$(DC) rm -vf cassandra
+	$(DC) build cassandra
+	$(DC) up -d --scale=cassandra=2 --scale=consuldc1=3 cassandra consuldc1
+	$(DC) logs -f cassandra
 
 consul:
-	$(DC) up -d consul
+	open $(CONSUL_URL)
 
 down:
 	$(DC) down --remove-orphans -v

From f459b69a20abc9d9c3867aeee498bc0b80613d5a Mon Sep 17 00:00:00 2001
From: Tomas Celaya <tjcelaya@gmail.com>
Date: Fri, 8 Dec 2017 18:26:52 -0800
Subject: [PATCH 6/7] Configuration for smaller cassandra nodes

---
 Dockerfile                              |    9 +-
 etc/cassandra.tiny.yaml.ctmpl           | 1248 +++++++++++++++++++++++
 etc/containerpilot.json5                |    6 +-
 etc/containerpilot_handler.py           |    2 +-
 etc/containerpilot_handler/cassandra.py |    3 +-
 examples/compose/local-compose.yml      |    2 +-
 6 files changed, 1262 insertions(+), 8 deletions(-)
 create mode 100644 etc/cassandra.tiny.yaml.ctmpl

diff --git a/Dockerfile b/Dockerfile
index 65a1125..833ed80 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -57,7 +57,10 @@ COPY etc/containerpilot.json5 /etc/containerpilot.json5
 
 COPY etc/containerpilot_handler /usr/local/bin/containerpilot_handler
 COPY etc/containerpilot_handler.py /usr/local/bin/containerpilot_handler.py
-COPY etc/cassandra.yaml.ctmpl /etc/cassandra/cassandra.yaml.ctmpl
+# COPY etc/cassandra.yaml.ctmpl /etc/cassandra/cassandra.yaml.ctmpl
+
+# the following COPY should be used for minimal-memory installations (as low as 256m?)
+COPY etc/cassandra.tiny.yaml.ctmpl /etc/cassandra/cassandra.yaml.ctmpl
 
 # disable the automatic seed configuration that enables single-node bootstrapping
 # the first line corresponds to "always set self as seed"
@@ -65,6 +68,10 @@ COPY etc/cassandra.yaml.ctmpl /etc/cassandra/cassandra.yaml.ctmpl
 RUN sed -ri '/CASSANDRA_SEEDS.*CASSANDRA_BROADCAST_ADDRESS/d' /docker-entrypoint.sh && \
     sed -ri '/sed -ri.*CASSANDRA_SEEDS.*\/cassandra.yaml/d' /docker-entrypoint.sh
 
+# TODO: uncomment for tiny cassandra nodes (don't forget to change the COPY above to cassandra.tiny.yaml.ctmpl)
+RUN sed -ri 's/^#MAX_HEAP_SIZE.*/MAX_HEAP_SIZE="64M"/' /etc/cassandra/cassandra-env.sh && \
+    sed -ri 's/^#HEAP_NEWSIZE.*/HEAP_NEWSIZE="12M"/' /etc/cassandra/cassandra-env.sh
+
 
 EXPOSE 7000 7001 7199 9042 9160
 
diff --git a/etc/cassandra.tiny.yaml.ctmpl b/etc/cassandra.tiny.yaml.ctmpl
new file mode 100644
index 0000000..2f2ec61
--- /dev/null
+++ b/etc/cassandra.tiny.yaml.ctmpl
@@ -0,0 +1,1248 @@
+# Cassandra storage config YAML
+
+# NOTE:
+#   See http://wiki.apache.org/cassandra/StorageConfiguration for
+#   full explanations of configuration directives
+# /NOTE
+
+# The name of the cluster. This is mainly used to prevent machines in
+# one logical cluster from joining another.
+cluster_name: 'Test Cluster'
+
+# This defines the number of tokens randomly assigned to this node on the ring
+# The more tokens, relative to other nodes, the larger the proportion of data
+# that this node will store. You probably want all nodes to have the same number
+# of tokens assuming they have equal hardware capability.
+#
+# If you leave this unspecified, Cassandra will use the default of 1 token for legacy compatibility,
+# and will use the initial_token as described below.
+#
+# Specifying initial_token will override this setting on the node's initial start,
+# on subsequent starts, this setting will apply even if initial token is set.
+#
+# If you already have a cluster with 1 token per node, and wish to migrate to 
+# multiple tokens per node, see http://wiki.apache.org/cassandra/Operations
+num_tokens: 256
+
+# Triggers automatic allocation of num_tokens tokens for this node. The allocation
+# algorithm attempts to choose tokens in a way that optimizes replicated load over
+# the nodes in the datacenter for the replication strategy used by the specified
+# keyspace.
+#
+# The load assigned to each node will be close to proportional to its number of
+# vnodes.
+#
+# Only supported with the Murmur3Partitioner.
+# allocate_tokens_for_keyspace: KEYSPACE
+
+# initial_token allows you to specify tokens manually.  While you can use it with
+# vnodes (num_tokens > 1, above) -- in which case you should provide a 
+# comma-separated list -- it's primarily used when adding nodes to legacy clusters 
+# that do not have vnodes enabled.
+# initial_token:
+
+# See http://wiki.apache.org/cassandra/HintedHandoff
+# May either be "true" or "false" to enable globally
+hinted_handoff_enabled: true
+
+# When hinted_handoff_enabled is true, a black list of data centers that will not
+# perform hinted handoff
+# hinted_handoff_disabled_datacenters:
+#    - DC1
+#    - DC2
+
+# this defines the maximum amount of time a dead host will have hints
+# generated.  After it has been dead this long, new hints for it will not be
+# created until it has been seen alive and gone down again.
+max_hint_window_in_ms: 10800000 # 3 hours
+
+# Maximum throttle in KBs per second, per delivery thread.  This will be
+# reduced proportionally to the number of nodes in the cluster.  (If there
+# are two nodes in the cluster, each delivery thread will use the maximum
+# rate; if there are three, each will throttle to half of the maximum,
+# since we expect two nodes to be delivering hints simultaneously.)
+hinted_handoff_throttle_in_kb: 1024
+
+# Number of threads with which to deliver hints;
+# Consider increasing this number when you have multi-dc deployments, since
+# cross-dc handoff tends to be slower
+max_hints_delivery_threads: 2
+
+# Directory where Cassandra should store hints.
+# If not set, the default directory is $CASSANDRA_HOME/data/hints.
+# hints_directory: /var/lib/cassandra/hints
+
+# How often hints should be flushed from the internal buffers to disk.
+# Will *not* trigger fsync.
+hints_flush_period_in_ms: 10000
+
+# Maximum size for a single hints file, in megabytes.
+max_hints_file_size_in_mb: 128
+
+# Compression to apply to the hint files. If omitted, hints files
+# will be written uncompressed. LZ4, Snappy, and Deflate compressors
+# are supported.
+#hints_compression:
+#   - class_name: LZ4Compressor
+#     parameters:
+#         -
+
+# Maximum throttle in KBs per second, total. This will be
+# reduced proportionally to the number of nodes in the cluster.
+batchlog_replay_throttle_in_kb: 1024
+
+# Authentication backend, implementing IAuthenticator; used to identify users
+# Out of the box, Cassandra provides org.apache.cassandra.auth.{AllowAllAuthenticator,
+# PasswordAuthenticator}.
+#
+# - AllowAllAuthenticator performs no checks - set it to disable authentication.
+# - PasswordAuthenticator relies on username/password pairs to authenticate
+#   users. It keeps usernames and hashed passwords in system_auth.roles table.
+#   Please increase system_auth keyspace replication factor if you use this authenticator.
+#   If using PasswordAuthenticator, CassandraRoleManager must also be used (see below)
+authenticator: AllowAllAuthenticator
+
+# Authorization backend, implementing IAuthorizer; used to limit access/provide permissions
+# Out of the box, Cassandra provides org.apache.cassandra.auth.{AllowAllAuthorizer,
+# CassandraAuthorizer}.
+#
+# - AllowAllAuthorizer allows any action to any user - set it to disable authorization.
+# - CassandraAuthorizer stores permissions in system_auth.role_permissions table. Please
+#   increase system_auth keyspace replication factor if you use this authorizer.
+authorizer: AllowAllAuthorizer
+
+# Part of the Authentication & Authorization backend, implementing IRoleManager; used
+# to maintain grants and memberships between roles.
+# Out of the box, Cassandra provides org.apache.cassandra.auth.CassandraRoleManager,
+# which stores role information in the system_auth keyspace. Most functions of the
+# IRoleManager require an authenticated login, so unless the configured IAuthenticator
+# actually implements authentication, most of this functionality will be unavailable.
+#
+# - CassandraRoleManager stores role data in the system_auth keyspace. Please
+#   increase system_auth keyspace replication factor if you use this role manager.
+role_manager: CassandraRoleManager
+
+# Validity period for roles cache (fetching granted roles can be an expensive
+# operation depending on the role manager, CassandraRoleManager is one example)
+# Granted roles are cached for authenticated sessions in AuthenticatedUser and
+# after the period specified here, become eligible for (async) reload.
+# Defaults to 2000, set to 0 to disable caching entirely.
+# Will be disabled automatically for AllowAllAuthenticator.
+roles_validity_in_ms: 2000
+
+# Refresh interval for roles cache (if enabled).
+# After this interval, cache entries become eligible for refresh. Upon next
+# access, an async reload is scheduled and the old value returned until it
+# completes. If roles_validity_in_ms is non-zero, then this must be
+# also.
+# Defaults to the same value as roles_validity_in_ms.
+# roles_update_interval_in_ms: 2000
+
+# Validity period for permissions cache (fetching permissions can be an
+# expensive operation depending on the authorizer, CassandraAuthorizer is
+# one example). Defaults to 2000, set to 0 to disable.
+# Will be disabled automatically for AllowAllAuthorizer.
+permissions_validity_in_ms: 2000
+
+# Refresh interval for permissions cache (if enabled).
+# After this interval, cache entries become eligible for refresh. Upon next
+# access, an async reload is scheduled and the old value returned until it
+# completes. If permissions_validity_in_ms is non-zero, then this must be
+# also.
+# Defaults to the same value as permissions_validity_in_ms.
+# permissions_update_interval_in_ms: 2000
+
+# Validity period for credentials cache. This cache is tightly coupled to
+# the provided PasswordAuthenticator implementation of IAuthenticator. If
+# another IAuthenticator implementation is configured, this cache will not
+# be automatically used and so the following settings will have no effect.
+# Please note, credentials are cached in their encrypted form, so while
+# activating this cache may reduce the number of queries made to the
+# underlying table, it may not  bring a significant reduction in the
+# latency of individual authentication attempts.
+# Defaults to 2000, set to 0 to disable credentials caching.
+credentials_validity_in_ms: 2000
+
+# Refresh interval for credentials cache (if enabled).
+# After this interval, cache entries become eligible for refresh. Upon next
+# access, an async reload is scheduled and the old value returned until it
+# completes. If credentials_validity_in_ms is non-zero, then this must be
+# also.
+# Defaults to the same value as credentials_validity_in_ms.
+# credentials_update_interval_in_ms: 2000
+
+# The partitioner is responsible for distributing groups of rows (by
+# partition key) across nodes in the cluster.  You should leave this
+# alone for new clusters.  The partitioner can NOT be changed without
+# reloading all data, so when upgrading you should set this to the
+# same partitioner you were already using.
+#
+# Besides Murmur3Partitioner, partitioners included for backwards
+# compatibility include RandomPartitioner, ByteOrderedPartitioner, and
+# OrderPreservingPartitioner.
+#
+partitioner: org.apache.cassandra.dht.Murmur3Partitioner
+
+# Directories where Cassandra should store data on disk.  Cassandra
+# will spread data evenly across them, subject to the granularity of
+# the configured compaction strategy.
+# If not set, the default directory is $CASSANDRA_HOME/data/data.
+data_file_directories:
+    - /var/lib/cassandra/data
+
+# commit log.  when running on magnetic HDD, this should be a
+# separate spindle than the data directories.
+# If not set, the default directory is $CASSANDRA_HOME/data/commitlog.
+commitlog_directory: /var/lib/cassandra/commitlog
+
+# Enable / disable CDC functionality on a per-node basis. This modifies the logic used
+# for write path allocation rejection (standard: never reject. cdc: reject Mutation
+# containing a CDC-enabled table if at space limit in cdc_raw_directory).
+cdc_enabled: false
+
+# CommitLogSegments are moved to this directory on flush if cdc_enabled: true and the
+# segment contains mutations for a CDC-enabled table. This should be placed on a
+# separate spindle than the data directories. If not set, the default directory is
+# $CASSANDRA_HOME/data/cdc_raw.
+# cdc_raw_directory: /var/lib/cassandra/cdc_raw
+
+# Policy for data disk failures:
+#
+# die
+#   shut down gossip and client transports and kill the JVM for any fs errors or
+#   single-sstable errors, so the node can be replaced.
+#
+# stop_paranoid
+#   shut down gossip and client transports even for single-sstable errors,
+#   kill the JVM for errors during startup.
+#
+# stop
+#   shut down gossip and client transports, leaving the node effectively dead, but
+#   can still be inspected via JMX, kill the JVM for errors during startup.
+#
+# best_effort
+#    stop using the failed disk and respond to requests based on
+#    remaining available sstables.  This means you WILL see obsolete
+#    data at CL.ONE!
+#
+# ignore
+#    ignore fatal errors and let requests fail, as in pre-1.2 Cassandra
+disk_failure_policy: stop
+
+# Policy for commit disk failures:
+#
+# die
+#   shut down gossip and Thrift and kill the JVM, so the node can be replaced.
+#
+# stop
+#   shut down gossip and Thrift, leaving the node effectively dead, but
+#   can still be inspected via JMX.
+#
+# stop_commit
+#   shutdown the commit log, letting writes collect but
+#   continuing to service reads, as in pre-2.0.5 Cassandra
+#
+# ignore
+#   ignore fatal errors and let the batches fail
+commit_failure_policy: stop
+
+# Maximum size of the native protocol prepared statement cache
+#
+# Valid values are either "auto" (omitting the value) or a value greater 0.
+#
+# Note that specifying a too large value will result in long running GCs and possbily
+# out-of-memory errors. Keep the value at a small fraction of the heap.
+#
+# If you constantly see "prepared statements discarded in the last minute because
+# cache limit reached" messages, the first step is to investigate the root cause
+# of these messages and check whether prepared statements are used correctly -
+# i.e. use bind markers for variable parts.
+#
+# Do only change the default value, if you really have more prepared statements than
+# fit in the cache. In most cases it is not neccessary to change this value.
+# Constantly re-preparing statements is a performance penalty.
+#
+# Default value ("auto") is 1/256th of the heap or 10MB, whichever is greater
+prepared_statements_cache_size_mb:
+
+# Maximum size of the Thrift prepared statement cache
+#
+# If you do not use Thrift at all, it is safe to leave this value at "auto".
+#
+# See description of 'prepared_statements_cache_size_mb' above for more information.
+#
+# Default value ("auto") is 1/256th of the heap or 10MB, whichever is greater
+thrift_prepared_statements_cache_size_mb:
+
+# Maximum size of the key cache in memory.
+#
+# Each key cache hit saves 1 seek and each row cache hit saves 2 seeks at the
+# minimum, sometimes more. The key cache is fairly tiny for the amount of
+# time it saves, so it's worthwhile to use it at large numbers.
+# The row cache saves even more time, but must contain the entire row,
+# so it is extremely space-intensive. It's best to only use the
+# row cache if you have hot rows or static rows.
+#
+# NOTE: if you reduce the size, you may not get you hottest keys loaded on startup.
+#
+# Default value is empty to make it "auto" (min(5% of Heap (in MB), 100MB)). Set to 0 to disable key cache.
+key_cache_size_in_mb: 0
+
+# Duration in seconds after which Cassandra should
+# save the key cache. Caches are saved to saved_caches_directory as
+# specified in this configuration file.
+#
+# Saved caches greatly improve cold-start speeds, and is relatively cheap in
+# terms of I/O for the key cache. Row cache saving is much more expensive and
+# has limited use.
+#
+# Default is 14400 or 4 hours.
+key_cache_save_period: 14400
+
+# Number of keys from the key cache to save
+# Disabled by default, meaning all keys are going to be saved
+# key_cache_keys_to_save: 100
+
+# Row cache implementation class name. Available implementations:
+#
+# org.apache.cassandra.cache.OHCProvider
+#   Fully off-heap row cache implementation (default).
+#
+# org.apache.cassandra.cache.SerializingCacheProvider
+#   This is the row cache implementation availabile
+#   in previous releases of Cassandra.
+# row_cache_class_name: org.apache.cassandra.cache.OHCProvider
+
+# Maximum size of the row cache in memory.
+# Please note that OHC cache implementation requires some additional off-heap memory to manage
+# the map structures and some in-flight memory during operations before/after cache entries can be
+# accounted against the cache capacity. This overhead is usually small compared to the whole capacity.
+# Do not specify more memory that the system can afford in the worst usual situation and leave some
+# headroom for OS block level cache. Do never allow your system to swap.
+#
+# Default value is 0, to disable row caching.
+row_cache_size_in_mb: 0
+
+# Duration in seconds after which Cassandra should save the row cache.
+# Caches are saved to saved_caches_directory as specified in this configuration file.
+#
+# Saved caches greatly improve cold-start speeds, and is relatively cheap in
+# terms of I/O for the key cache. Row cache saving is much more expensive and
+# has limited use.
+#
+# Default is 0 to disable saving the row cache.
+row_cache_save_period: 0
+
+# Number of keys from the row cache to save.
+# Specify 0 (which is the default), meaning all keys are going to be saved
+# row_cache_keys_to_save: 100
+
+# Maximum size of the counter cache in memory.
+#
+# Counter cache helps to reduce counter locks' contention for hot counter cells.
+# In case of RF = 1 a counter cache hit will cause Cassandra to skip the read before
+# write entirely. With RF > 1 a counter cache hit will still help to reduce the duration
+# of the lock hold, helping with hot counter cell updates, but will not allow skipping
+# the read entirely. Only the local (clock, count) tuple of a counter cell is kept
+# in memory, not the whole counter, so it's relatively cheap.
+#
+# NOTE: if you reduce the size, you may not get you hottest keys loaded on startup.
+#
+# Default value is empty to make it "auto" (min(2.5% of Heap (in MB), 50MB)). Set to 0 to disable counter cache.
+# NOTE: if you perform counter deletes and rely on low gcgs, you should disable the counter cache.
+counter_cache_size_in_mb:
+
+# Duration in seconds after which Cassandra should
+# save the counter cache (keys only). Caches are saved to saved_caches_directory as
+# specified in this configuration file.
+#
+# Default is 7200 or 2 hours.
+counter_cache_save_period: 7200
+
+# Number of keys from the counter cache to save
+# Disabled by default, meaning all keys are going to be saved
+# counter_cache_keys_to_save: 100
+
+# saved caches
+# If not set, the default directory is $CASSANDRA_HOME/data/saved_caches.
+saved_caches_directory: /var/lib/cassandra/saved_caches
+
+# commitlog_sync may be either "periodic" or "batch." 
+# 
+# When in batch mode, Cassandra won't ack writes until the commit log
+# has been fsynced to disk.  It will wait
+# commitlog_sync_batch_window_in_ms milliseconds between fsyncs.
+# This window should be kept short because the writer threads will
+# be unable to do extra work while waiting.  (You may need to increase
+# concurrent_writes for the same reason.)
+#
+# commitlog_sync: batch
+# commitlog_sync_batch_window_in_ms: 2
+#
+# the other option is "periodic" where writes may be acked immediately
+# and the CommitLog is simply synced every commitlog_sync_period_in_ms
+# milliseconds. 
+commitlog_sync: periodic
+commitlog_sync_period_in_ms: 10000
+
+# The size of the individual commitlog file segments.  A commitlog
+# segment may be archived, deleted, or recycled once all the data
+# in it (potentially from each columnfamily in the system) has been
+# flushed to sstables.
+#
+# The default size is 32, which is almost always fine, but if you are
+# archiving commitlog segments (see commitlog_archiving.properties),
+# then you probably want a finer granularity of archiving; 8 or 16 MB
+# is reasonable.
+# Max mutation size is also configurable via max_mutation_size_in_kb setting in
+# cassandra.yaml. The default is half the size commitlog_segment_size_in_mb * 1024.
+#
+# NOTE: If max_mutation_size_in_kb is set explicitly then commitlog_segment_size_in_mb must
+# be set to at least twice the size of max_mutation_size_in_kb / 1024
+#
+commitlog_segment_size_in_mb: 32
+
+# Compression to apply to the commit log. If omitted, the commit log
+# will be written uncompressed.  LZ4, Snappy, and Deflate compressors
+# are supported.
+# commitlog_compression:
+#   - class_name: LZ4Compressor
+#     parameters:
+#         -
+
+# any class that implements the SeedProvider interface and has a
+# constructor that takes a Map<String, String> of parameters will do.
+seed_provider:
+    # Addresses of hosts that are deemed contact points. 
+    # Cassandra nodes use this list of hosts to find each other and learn
+    # the topology of the ring.  You must change this if you are running
+    # multiple nodes!
+    - class_name: org.apache.cassandra.locator.SimpleSeedProvider
+      parameters:
+          # seeds is actually a comma-delimited list of addresses.
+          # Ex: "<ip1>,<ip2>,<ip3>"
+          {{ $clusterName := env "CASSANDRA_CLUSTER_NAME" }}
+          - seeds: {{ range datacenters }}{{ $seedKey := printf "cassandra-seeds-%s-%s@%s" $clusterName . . }}{{ if keyExists $seedKey }}{{ key $seedKey }},{{ end }}{{ end }}
+
+# {{ range datacenters }}{{ $seedKey := printf "cassandra-seeds-%s-%s@%s" $clusterName . . }}{{ if keyExists $seedKey }}{{ key $seedKey }},{{ end }}{{ end }}
+
+# For workloads with more data than can fit in memory, Cassandra's
+# bottleneck will be reads that need to fetch data from
+# disk. "concurrent_reads" should be set to (16 * number_of_drives) in
+# order to allow the operations to enqueue low enough in the stack
+# that the OS and drives can reorder them. Same applies to
+# "concurrent_counter_writes", since counter writes read the current
+# values before incrementing and writing them back.
+#
+# On the other hand, since writes are almost never IO bound, the ideal
+# number of "concurrent_writes" is dependent on the number of cores in
+# your system; (8 * number_of_cores) is a good rule of thumb.
+
+# concurrent_reads: 32
+# concurrent_writes: 32
+# concurrent_counter_writes: 32
+concurrent_reads: 2
+concurrent_writes: 2
+concurrent_counter_writes: 2
+
+# For materialized view writes, as there is a read involved, so this should
+# be limited by the less of concurrent reads or concurrent writes.
+
+# concurrent_materialized_view_writes: 32
+concurrent_materialized_view_writes: 2
+
+# Maximum memory to use for sstable chunk cache and buffer pooling.
+# 32MB of this are reserved for pooling buffers, the rest is used as an
+# cache that holds uncompressed sstable chunks.
+# Defaults to the smaller of 1/4 of heap or 512MB. This pool is allocated off-heap,
+# so is in addition to the memory allocated for heap. The cache also has on-heap
+# overhead which is roughly 128 bytes per chunk (i.e. 0.2% of the reserved size
+# if the default 64k chunk size is used).
+# Memory is only allocated when needed.
+# file_cache_size_in_mb: 512
+
+# Flag indicating whether to allocate on or off heap when the sstable buffer
+# pool is exhausted, that is when it has exceeded the maximum memory
+# file_cache_size_in_mb, beyond which it will not cache buffers but allocate on request.
+
+# buffer_pool_use_heap_if_exhausted: true
+
+# The strategy for optimizing disk read
+# Possible values are:
+# ssd (for solid state disks, the default)
+# spinning (for spinning disks)
+# disk_optimization_strategy: ssd
+
+# Total permitted memory to use for memtables. Cassandra will stop
+# accepting writes when the limit is exceeded until a flush completes,
+# and will trigger a flush based on memtable_cleanup_threshold
+# If omitted, Cassandra will set both to 1/4 the size of the heap.
+memtable_heap_space_in_mb: 128
+memtable_offheap_space_in_mb: 128
+
+# memtable_cleanup_threshold is deprecated. The default calculation
+# is the only reasonable choice. See the comments on  memtable_flush_writers
+# for more information.
+#
+# Ratio of occupied non-flushing memtable size to total permitted size
+# that will trigger a flush of the largest memtable. Larger mct will
+# mean larger flushes and hence less compaction, but also less concurrent
+# flush activity which can make it difficult to keep your disks fed
+# under heavy write load.
+#
+# memtable_cleanup_threshold defaults to 1 / (memtable_flush_writers + 1)
+# memtable_cleanup_threshold: 0.11
+
+# Specify the way Cassandra allocates and manages memtable memory.
+# Options are:
+#
+# heap_buffers
+#   on heap nio buffers
+#
+# offheap_buffers
+#   off heap (direct) nio buffers
+#
+# offheap_objects
+#    off heap objects
+memtable_allocation_type: heap_buffers
+
+# Total space to use for commit logs on disk.
+#
+# If space gets above this value, Cassandra will flush every dirty CF
+# in the oldest segment and remove it.  So a small total commitlog space
+# will tend to cause more flush activity on less-active columnfamilies.
+#
+# The default value is the smaller of 8192, and 1/4 of the total space
+# of the commitlog volume.
+#
+# commitlog_total_space_in_mb: 8192
+
+# This sets the number of memtable flush writer threads per disk
+# as well as the total number of memtables that can be flushed concurrently.
+# These are generally a combination of compute and IO bound.
+#
+# Memtable flushing is more CPU efficient than memtable ingest and a single thread
+# can keep up with the ingest rate of a whole server on a single fast disk
+# until it temporarily becomes IO bound under contention typically with compaction.
+# At that point you need multiple flush threads. At some point in the future
+# it may become CPU bound all the time.
+#
+# You can tell if flushing is falling behind using the MemtablePool.BlockedOnAllocation
+# metric which should be 0, but will be non-zero if threads are blocked waiting on flushing
+# to free memory.
+#
+# memtable_flush_writers defaults to two for a single data directory.
+# This means that two  memtables can be flushed concurrently to the single data directory.
+# If you have multiple data directories the default is one memtable flushing at a time
+# but the flush will use a thread per data directory so you will get two or more writers.
+#
+# Two is generally enough to flush on a fast disk [array] mounted as a single data directory.
+# Adding more flush writers will result in smaller more frequent flushes that introduce more
+# compaction overhead.
+#
+# There is a direct tradeoff between number of memtables that can be flushed concurrently
+# and flush size and frequency. More is not better you just need enough flush writers
+# to never stall waiting for flushing to free memory.
+#
+#memtable_flush_writers: 2
+
+# Total space to use for change-data-capture logs on disk.
+#
+# If space gets above this value, Cassandra will throw WriteTimeoutException
+# on Mutations including tables with CDC enabled. A CDCCompactor is responsible
+# for parsing the raw CDC logs and deleting them when parsing is completed.
+#
+# The default value is the min of 4096 mb and 1/8th of the total space
+# of the drive where cdc_raw_directory resides.
+# cdc_total_space_in_mb: 4096
+
+# When we hit our cdc_raw limit and the CDCCompactor is either running behind
+# or experiencing backpressure, we check at the following interval to see if any
+# new space for cdc-tracked tables has been made available. Default to 250ms
+# cdc_free_space_check_interval_ms: 250
+
+# A fixed memory pool size in MB for for SSTable index summaries. If left
+# empty, this will default to 5% of the heap size. If the memory usage of
+# all index summaries exceeds this limit, SSTables with low read rates will
+# shrink their index summaries in order to meet this limit.  However, this
+# is a best-effort process. In extreme conditions Cassandra may need to use
+# more than this amount of memory.
+index_summary_capacity_in_mb:
+
+# How frequently index summaries should be resampled.  This is done
+# periodically to redistribute memory from the fixed-size pool to sstables
+# proportional their recent read rates.  Setting to -1 will disable this
+# process, leaving existing index summaries at their current sampling level.
+index_summary_resize_interval_in_minutes: 60
+
+# Whether to, when doing sequential writing, fsync() at intervals in
+# order to force the operating system to flush the dirty
+# buffers. Enable this to avoid sudden dirty buffer flushing from
+# impacting read latencies. Almost always a good idea on SSDs; not
+# necessarily on platters.
+trickle_fsync: false
+trickle_fsync_interval_in_kb: 10240
+
+# TCP port, for commands and data
+# For security reasons, you should not expose this port to the internet.  Firewall it if needed.
+storage_port: 7000
+
+# SSL port, for encrypted communication.  Unused unless enabled in
+# encryption_options
+# For security reasons, you should not expose this port to the internet.  Firewall it if needed.
+ssl_storage_port: 7001
+
+# Address or interface to bind to and tell other Cassandra nodes to connect to.
+# You _must_ change this if you want multiple nodes to be able to communicate!
+#
+# Set listen_address OR listen_interface, not both.
+#
+# Leaving it blank leaves it up to InetAddress.getLocalHost(). This
+# will always do the Right Thing _if_ the node is properly configured
+# (hostname, name resolution, etc), and the Right Thing is to use the
+# address associated with the hostname (it might not be).
+#
+# Setting listen_address to 0.0.0.0 is always wrong.
+#
+listen_address: 172.20.0.3
+
+# Set listen_address OR listen_interface, not both. Interfaces must correspond
+# to a single address, IP aliasing is not supported.
+# listen_interface: eth0
+
+# If you choose to specify the interface by name and the interface has an ipv4 and an ipv6 address
+# you can specify which should be chosen using listen_interface_prefer_ipv6. If false the first ipv4
+# address will be used. If true the first ipv6 address will be used. Defaults to false preferring
+# ipv4. If there is only one address it will be selected regardless of ipv4/ipv6.
+# listen_interface_prefer_ipv6: false
+
+# Address to broadcast to other Cassandra nodes
+# Leaving this blank will set it to the same value as listen_address
+broadcast_address: 172.20.0.3
+
+# When using multiple physical network interfaces, set this
+# to true to listen on broadcast_address in addition to
+# the listen_address, allowing nodes to communicate in both
+# interfaces.
+# Ignore this property if the network configuration automatically
+# routes  between the public and private networks such as EC2.
+# listen_on_broadcast_address: false
+
+# Internode authentication backend, implementing IInternodeAuthenticator;
+# used to allow/disallow connections from peer nodes.
+# internode_authenticator: org.apache.cassandra.auth.AllowAllInternodeAuthenticator
+
+# Whether to start the native transport server.
+# Please note that the address on which the native transport is bound is the
+# same as the rpc_address. The port however is different and specified below.
+start_native_transport: true
+# port for the CQL native transport to listen for clients on
+# For security reasons, you should not expose this port to the internet.  Firewall it if needed.
+native_transport_port: 9042
+# Enabling native transport encryption in client_encryption_options allows you to either use
+# encryption for the standard port or to use a dedicated, additional port along with the unencrypted
+# standard native_transport_port.
+# Enabling client encryption and keeping native_transport_port_ssl disabled will use encryption
+# for native_transport_port. Setting native_transport_port_ssl to a different value
+# from native_transport_port will use encryption for native_transport_port_ssl while
+# keeping native_transport_port unencrypted.
+# native_transport_port_ssl: 9142
+# The maximum threads for handling requests when the native transport is used.
+# This is similar to rpc_max_threads though the default differs slightly (and
+# there is no native_transport_min_threads, idle threads will always be stopped
+# after 30 seconds).
+# native_transport_max_threads: 128
+#
+# The maximum size of allowed frame. Frame (requests) larger than this will
+# be rejected as invalid. The default is 256MB. If you're changing this parameter,
+# you may want to adjust max_value_size_in_mb accordingly.
+# native_transport_max_frame_size_in_mb: 256
+
+# The maximum number of concurrent client connections.
+# The default is -1, which means unlimited.
+# native_transport_max_concurrent_connections: -1
+
+# The maximum number of concurrent client connections per source ip.
+# The default is -1, which means unlimited.
+# native_transport_max_concurrent_connections_per_ip: -1
+
+# Whether to start the thrift rpc server.
+start_rpc: false
+
+# The address or interface to bind the Thrift RPC service and native transport
+# server to.
+#
+# Set rpc_address OR rpc_interface, not both.
+#
+# Leaving rpc_address blank has the same effect as on listen_address
+# (i.e. it will be based on the configured hostname of the node).
+#
+# Note that unlike listen_address, you can specify 0.0.0.0, but you must also
+# set broadcast_rpc_address to a value other than 0.0.0.0.
+#
+# For security reasons, you should not expose this port to the internet.  Firewall it if needed.
+rpc_address: 0.0.0.0
+
+# Set rpc_address OR rpc_interface, not both. Interfaces must correspond
+# to a single address, IP aliasing is not supported.
+# rpc_interface: eth1
+
+# If you choose to specify the interface by name and the interface has an ipv4 and an ipv6 address
+# you can specify which should be chosen using rpc_interface_prefer_ipv6. If false the first ipv4
+# address will be used. If true the first ipv6 address will be used. Defaults to false preferring
+# ipv4. If there is only one address it will be selected regardless of ipv4/ipv6.
+# rpc_interface_prefer_ipv6: false
+
+# port for Thrift to listen for clients on
+rpc_port: 9160
+
+# RPC address to broadcast to drivers and other Cassandra nodes. This cannot
+# be set to 0.0.0.0. If left blank, this will be set to the value of
+# rpc_address. If rpc_address is set to 0.0.0.0, broadcast_rpc_address must
+# be set.
+broadcast_rpc_address: 172.20.0.3
+
+# enable or disable keepalive on rpc/native connections
+rpc_keepalive: true
+
+# Cassandra provides two out-of-the-box options for the RPC Server:
+#
+# sync
+#   One thread per thrift connection. For a very large number of clients, memory
+#   will be your limiting factor. On a 64 bit JVM, 180KB is the minimum stack size
+#   per thread, and that will correspond to your use of virtual memory (but physical memory
+#   may be limited depending on use of stack space).
+#
+# hsha
+#   Stands for "half synchronous, half asynchronous." All thrift clients are handled
+#   asynchronously using a small number of threads that does not vary with the amount
+#   of thrift clients (and thus scales well to many clients). The rpc requests are still
+#   synchronous (one thread per active request). If hsha is selected then it is essential
+#   that rpc_max_threads is changed from the default value of unlimited.
+#
+# The default is sync because on Windows hsha is about 30% slower.  On Linux,
+# sync/hsha performance is about the same, with hsha of course using less memory.
+#
+# Alternatively,  can provide your own RPC server by providing the fully-qualified class name
+# of an o.a.c.t.TServerFactory that can create an instance of it.
+
+# rpc_server_type: sync
+rpc_server_type: hsha
+
+# Uncomment rpc_min|max_thread to set request pool size limits.
+#
+# Regardless of your choice of RPC server (see above), the number of maximum requests in the
+# RPC thread pool dictates how many concurrent requests are possible (but if you are using the sync
+# RPC server, it also dictates the number of clients that can be connected at all).
+#
+# The default is unlimited and thus provides no protection against clients overwhelming the server. You are
+# encouraged to set a maximum that makes sense for you in production, but do keep in mind that
+# rpc_max_threads represents the maximum number of client requests this server may execute concurrently.
+#
+# rpc_min_threads: 16
+# rpc_max_threads: 2048
+
+rpc_min_threads: 1
+rpc_max_threads: 2
+
+# uncomment to set socket buffer sizes on rpc connections
+# rpc_send_buff_size_in_bytes:
+# rpc_recv_buff_size_in_bytes:
+
+# Uncomment to set socket buffer size for internode communication
+# Note that when setting this, the buffer size is limited by net.core.wmem_max
+# and when not setting it it is defined by net.ipv4.tcp_wmem
+# See also:
+# /proc/sys/net/core/wmem_max
+# /proc/sys/net/core/rmem_max
+# /proc/sys/net/ipv4/tcp_wmem
+# /proc/sys/net/ipv4/tcp_wmem
+# and 'man tcp'
+# internode_send_buff_size_in_bytes:
+
+# Uncomment to set socket buffer size for internode communication
+# Note that when setting this, the buffer size is limited by net.core.wmem_max
+# and when not setting it it is defined by net.ipv4.tcp_wmem
+# internode_recv_buff_size_in_bytes:
+
+# Frame size for thrift (maximum message length).
+thrift_framed_transport_size_in_mb: 15
+
+# Set to true to have Cassandra create a hard link to each sstable
+# flushed or streamed locally in a backups/ subdirectory of the
+# keyspace data.  Removing these links is the operator's
+# responsibility.
+incremental_backups: false
+
+# Whether or not to take a snapshot before each compaction.  Be
+# careful using this option, since Cassandra won't clean up the
+# snapshots for you.  Mostly useful if you're paranoid when there
+# is a data format change.
+snapshot_before_compaction: false
+
+# Whether or not a snapshot is taken of the data before keyspace truncation
+# or dropping of column families. The STRONGLY advised default of true 
+# should be used to provide data safety. If you set this flag to false, you will
+# lose data on truncation or drop.
+auto_snapshot: true
+
+# Granularity of the collation index of rows within a partition.
+# Increase if your rows are large, or if you have a very large
+# number of rows per partition.  The competing goals are these:
+#
+# - a smaller granularity means more index entries are generated
+#   and looking up rows withing the partition by collation column
+#   is faster
+# - but, Cassandra will keep the collation index in memory for hot
+#   rows (as part of the key cache), so a larger granularity means
+#   you can cache more hot rows
+column_index_size_in_kb: 64
+
+# Per sstable indexed key cache entries (the collation index in memory
+# mentioned above) exceeding this size will not be held on heap.
+# This means that only partition information is held on heap and the
+# index entries are read from disk.
+#
+# Note that this size refers to the size of the
+# serialized index information and not the size of the partition.
+column_index_cache_size_in_kb: 2
+
+# Number of simultaneous compactions to allow, NOT including
+# validation "compactions" for anti-entropy repair.  Simultaneous
+# compactions can help preserve read performance in a mixed read/write
+# workload, by mitigating the tendency of small sstables to accumulate
+# during a single long running compactions. The default is usually
+# fine and if you experience problems with compaction running too
+# slowly or too fast, you should look at
+# compaction_throughput_mb_per_sec first.
+#
+# concurrent_compactors defaults to the smaller of (number of disks,
+# number of cores), with a minimum of 2 and a maximum of 8.
+# 
+# If your data directories are backed by SSD, you should increase this
+# to the number of cores.
+concurrent_compactors: 1
+
+# Throttles compaction to the given total throughput across the entire
+# system. The faster you insert data, the faster you need to compact in
+# order to keep the sstable count down, but in general, setting this to
+# 16 to 32 times the rate you are inserting data is more than sufficient.
+# Setting this to 0 disables throttling. Note that this account for all types
+# of compaction, including validation compaction.
+
+# compaction_throughput_mb_per_sec: 16
+compaction_throughput_mb_per_sec: 0
+
+# When compacting, the replacement sstable(s) can be opened before they
+# are completely written, and used in place of the prior sstables for
+# any range that has been written. This helps to smoothly transfer reads 
+# between the sstables, reducing page cache churn and keeping hot rows hot
+sstable_preemptive_open_interval_in_mb: 50
+
+# Throttles all outbound streaming file transfers on this node to the
+# given total throughput in Mbps. This is necessary because Cassandra does
+# mostly sequential IO when streaming data during bootstrap or repair, which
+# can lead to saturating the network connection and degrading rpc performance.
+# When unset, the default is 200 Mbps or 25 MB/s.
+# stream_throughput_outbound_megabits_per_sec: 200
+
+# Throttles all streaming file transfer between the datacenters,
+# this setting allows users to throttle inter dc stream throughput in addition
+# to throttling all network stream traffic as configured with
+# stream_throughput_outbound_megabits_per_sec
+# When unset, the default is 200 Mbps or 25 MB/s
+# inter_dc_stream_throughput_outbound_megabits_per_sec: 200
+
+# How long the coordinator should wait for read operations to complete
+read_request_timeout_in_ms: 5000
+# How long the coordinator should wait for seq or index scans to complete
+range_request_timeout_in_ms: 10000
+# How long the coordinator should wait for writes to complete
+write_request_timeout_in_ms: 2000
+# How long the coordinator should wait for counter writes to complete
+counter_write_request_timeout_in_ms: 5000
+# How long a coordinator should continue to retry a CAS operation
+# that contends with other proposals for the same row
+cas_contention_timeout_in_ms: 1000
+# How long the coordinator should wait for truncates to complete
+# (This can be much longer, because unless auto_snapshot is disabled
+# we need to flush first so we can snapshot before removing the data.)
+truncate_request_timeout_in_ms: 60000
+# The default timeout for other, miscellaneous operations
+request_timeout_in_ms: 10000
+
+# How long before a node logs slow queries. Select queries that take longer than
+# this timeout to execute, will generate an aggregated log message, so that slow queries
+# can be identified. Set this value to zero to disable slow query logging.
+slow_query_log_timeout_in_ms: 500
+
+# Enable operation timeout information exchange between nodes to accurately
+# measure request timeouts.  If disabled, replicas will assume that requests
+# were forwarded to them instantly by the coordinator, which means that
+# under overload conditions we will waste that much extra time processing 
+# already-timed-out requests.
+#
+# Warning: before enabling this property make sure to ntp is installed
+# and the times are synchronized between the nodes.
+cross_node_timeout: false
+
+# Set keep-alive period for streaming
+# This node will send a keep-alive message periodically with this period.
+# If the node does not receive a keep-alive message from the peer for
+# 2 keep-alive cycles the stream session times out and fail
+# Default value is 300s (5 minutes), which means stalled stream
+# times out in 10 minutes by default
+# streaming_keep_alive_period_in_secs: 300
+
+# phi value that must be reached for a host to be marked down.
+# most users should never need to adjust this.
+# phi_convict_threshold: 8
+
+# endpoint_snitch -- Set this to a class that implements
+# IEndpointSnitch.  The snitch has two functions:
+#
+# - it teaches Cassandra enough about your network topology to route
+#   requests efficiently
+# - it allows Cassandra to spread replicas around your cluster to avoid
+#   correlated failures. It does this by grouping machines into
+#   "datacenters" and "racks."  Cassandra will do its best not to have
+#   more than one replica on the same "rack" (which may not actually
+#   be a physical location)
+#
+# CASSANDRA WILL NOT ALLOW YOU TO SWITCH TO AN INCOMPATIBLE SNITCH
+# ONCE DATA IS INSERTED INTO THE CLUSTER.  This would cause data loss.
+# This means that if you start with the default SimpleSnitch, which
+# locates every node on "rack1" in "datacenter1", your only options
+# if you need to add another datacenter are GossipingPropertyFileSnitch
+# (and the older PFS).  From there, if you want to migrate to an
+# incompatible snitch like Ec2Snitch you can do it by adding new nodes
+# under Ec2Snitch (which will locate them in a new "datacenter") and
+# decommissioning the old ones.
+#
+# Out of the box, Cassandra provides:
+#
+# SimpleSnitch:
+#    Treats Strategy order as proximity. This can improve cache
+#    locality when disabling read repair.  Only appropriate for
+#    single-datacenter deployments.
+#
+# GossipingPropertyFileSnitch
+#    This should be your go-to snitch for production use.  The rack
+#    and datacenter for the local node are defined in
+#    cassandra-rackdc.properties and propagated to other nodes via
+#    gossip.  If cassandra-topology.properties exists, it is used as a
+#    fallback, allowing migration from the PropertyFileSnitch.
+#
+# PropertyFileSnitch:
+#    Proximity is determined by rack and data center, which are
+#    explicitly configured in cassandra-topology.properties.
+#
+# Ec2Snitch:
+#    Appropriate for EC2 deployments in a single Region. Loads Region
+#    and Availability Zone information from the EC2 API. The Region is
+#    treated as the datacenter, and the Availability Zone as the rack.
+#    Only private IPs are used, so this will not work across multiple
+#    Regions.
+#
+# Ec2MultiRegionSnitch:
+#    Uses public IPs as broadcast_address to allow cross-region
+#    connectivity.  (Thus, you should set seed addresses to the public
+#    IP as well.) You will need to open the storage_port or
+#    ssl_storage_port on the public IP firewall.  (For intra-Region
+#    traffic, Cassandra will switch to the private IP after
+#    establishing a connection.)
+#
+# RackInferringSnitch:
+#    Proximity is determined by rack and data center, which are
+#    assumed to correspond to the 3rd and 2nd octet of each node's IP
+#    address, respectively.  Unless this happens to match your
+#    deployment conventions, this is best used as an example of
+#    writing a custom Snitch class and is provided in that spirit.
+#
+# You can use a custom Snitch by setting this to the full class name
+# of the snitch, which will be assumed to be on your classpath.
+endpoint_snitch: SimpleSnitch
+
+# controls how often to perform the more expensive part of host score
+# calculation
+dynamic_snitch_update_interval_in_ms: 100 
+# controls how often to reset all host scores, allowing a bad host to
+# possibly recover
+dynamic_snitch_reset_interval_in_ms: 600000
+# if set greater than zero and read_repair_chance is < 1.0, this will allow
+# 'pinning' of replicas to hosts in order to increase cache capacity.
+# The badness threshold will control how much worse the pinned host has to be
+# before the dynamic snitch will prefer other replicas over it.  This is
+# expressed as a double which represents a percentage.  Thus, a value of
+# 0.2 means Cassandra would continue to prefer the static snitch values
+# until the pinned host was 20% worse than the fastest.
+dynamic_snitch_badness_threshold: 0.1
+
+# request_scheduler -- Set this to a class that implements
+# RequestScheduler, which will schedule incoming client requests
+# according to the specific policy. This is useful for multi-tenancy
+# with a single Cassandra cluster.
+# NOTE: This is specifically for requests from the client and does
+# not affect inter node communication.
+# org.apache.cassandra.scheduler.NoScheduler - No scheduling takes place
+# org.apache.cassandra.scheduler.RoundRobinScheduler - Round robin of
+# client requests to a node with a separate queue for each
+# request_scheduler_id. The scheduler is further customized by
+# request_scheduler_options as described below.
+request_scheduler: org.apache.cassandra.scheduler.NoScheduler
+
+# Scheduler Options vary based on the type of scheduler
+#
+# NoScheduler
+#   Has no options
+#
+# RoundRobin
+#   throttle_limit
+#     The throttle_limit is the number of in-flight
+#     requests per client.  Requests beyond 
+#     that limit are queued up until
+#     running requests can complete.
+#     The value of 80 here is twice the number of
+#     concurrent_reads + concurrent_writes.
+#   default_weight
+#     default_weight is optional and allows for
+#     overriding the default which is 1.
+#   weights
+#     Weights are optional and will default to 1 or the
+#     overridden default_weight. The weight translates into how
+#     many requests are handled during each turn of the
+#     RoundRobin, based on the scheduler id.
+#
+# request_scheduler_options:
+#    throttle_limit: 80
+#    default_weight: 5
+#    weights:
+#      Keyspace1: 1
+#      Keyspace2: 5
+
+# request_scheduler_id -- An identifier based on which to perform
+# the request scheduling. Currently the only valid option is keyspace.
+# request_scheduler_id: keyspace
+
+# Enable or disable inter-node encryption
+# JVM defaults for supported SSL socket protocols and cipher suites can
+# be replaced using custom encryption options. This is not recommended
+# unless you have policies in place that dictate certain settings, or
+# need to disable vulnerable ciphers or protocols in case the JVM cannot
+# be updated.
+# FIPS compliant settings can be configured at JVM level and should not
+# involve changing encryption settings here:
+# https://docs.oracle.com/javase/8/docs/technotes/guides/security/jsse/FIPS.html
+# *NOTE* No custom encryption options are enabled at the moment
+# The available internode options are : all, none, dc, rack
+#
+# If set to dc cassandra will encrypt the traffic between the DCs
+# If set to rack cassandra will encrypt the traffic between the racks
+#
+# The passwords used in these options must match the passwords used when generating
+# the keystore and truststore.  For instructions on generating these files, see:
+# http://download.oracle.com/javase/6/docs/technotes/guides/security/jsse/JSSERefGuide.html#CreateKeystore
+#
+server_encryption_options:
+    internode_encryption: none
+    keystore: conf/.keystore
+    keystore_password: cassandra
+    truststore: conf/.truststore
+    truststore_password: cassandra
+    # More advanced defaults below:
+    # protocol: TLS
+    # algorithm: SunX509
+    # store_type: JKS
+    # cipher_suites: [TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA,TLS_DHE_RSA_WITH_AES_128_CBC_SHA,TLS_DHE_RSA_WITH_AES_256_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA]
+    # require_client_auth: false
+    # require_endpoint_verification: false
+
+# enable or disable client/server encryption.
+client_encryption_options:
+    enabled: false
+    # If enabled and optional is set to true encrypted and unencrypted connections are handled.
+    optional: false
+    keystore: conf/.keystore
+    keystore_password: cassandra
+    # require_client_auth: false
+    # Set trustore and truststore_password if require_client_auth is true
+    # truststore: conf/.truststore
+    # truststore_password: cassandra
+    # More advanced defaults below:
+    # protocol: TLS
+    # algorithm: SunX509
+    # store_type: JKS
+    # cipher_suites: [TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA,TLS_DHE_RSA_WITH_AES_128_CBC_SHA,TLS_DHE_RSA_WITH_AES_256_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA]
+
+# internode_compression controls whether traffic between nodes is
+# compressed.
+# Can be:
+#
+# all
+#   all traffic is compressed
+#
+# dc
+#   traffic between different datacenters is compressed
+#
+# none
+#   nothing is compressed.
+internode_compression: dc
+
+# Enable or disable tcp_nodelay for inter-dc communication.
+# Disabling it will result in larger (but fewer) network packets being sent,
+# reducing overhead from the TCP protocol itself, at the cost of increasing
+# latency if you block for cross-datacenter responses.
+inter_dc_tcp_nodelay: false
+
+# TTL for different trace types used during logging of the repair process.
+tracetype_query_ttl: 86400
+tracetype_repair_ttl: 604800
+
+# By default, Cassandra logs GC Pauses greater than 200 ms at INFO level
+# This threshold can be adjusted to minimize logging if necessary
+# gc_log_threshold_in_ms: 200
+
+# If unset, all GC Pauses greater than gc_log_threshold_in_ms will log at
+# INFO level
+# UDFs (user defined functions) are disabled by default.
+# As of Cassandra 3.0 there is a sandbox in place that should prevent execution of evil code.
+enable_user_defined_functions: false
+
+# Enables scripted UDFs (JavaScript UDFs).
+# Java UDFs are always enabled, if enable_user_defined_functions is true.
+# Enable this option to be able to use UDFs with "language javascript" or any custom JSR-223 provider.
+# This option has no effect, if enable_user_defined_functions is false.
+enable_scripted_user_defined_functions: false
+
+# The default Windows kernel timer and scheduling resolution is 15.6ms for power conservation.
+# Lowering this value on Windows can provide much tighter latency and better throughput, however
+# some virtualized environments may see a negative performance impact from changing this setting
+# below their system default. The sysinternals 'clockres' tool can confirm your system's default
+# setting.
+windows_timer_interval: 1
+
+
+# Enables encrypting data at-rest (on disk). Different key providers can be plugged in, but the default reads from
+# a JCE-style keystore. A single keystore can hold multiple keys, but the one referenced by
+# the "key_alias" is the only key that will be used for encrypt opertaions; previously used keys
+# can still (and should!) be in the keystore and will be used on decrypt operations
+# (to handle the case of key rotation).
+#
+# It is strongly recommended to download and install Java Cryptography Extension (JCE)
+# Unlimited Strength Jurisdiction Policy Files for your version of the JDK.
+# (current link: http://www.oracle.com/technetwork/java/javase/downloads/jce8-download-2133166.html)
+#
+# Currently, only the following file types are supported for transparent data encryption, although
+# more are coming in future cassandra releases: commitlog, hints
+transparent_data_encryption_options:
+    enabled: false
+    chunk_length_kb: 64
+    cipher: AES/CBC/PKCS5Padding
+    key_alias: testing:1
+    # CBC IV length for AES needs to be 16 bytes (which is also the default size)
+    # iv_length: 16
+    key_provider: 
+      - class_name: org.apache.cassandra.security.JKSKeyProvider
+        parameters: 
+          - keystore: conf/.keystore
+            keystore_password: cassandra
+            store_type: JCEKS
+            key_password: cassandra
+
+
+#####################
+# SAFETY THRESHOLDS #
+#####################
+
+# When executing a scan, within or across a partition, we need to keep the
+# tombstones seen in memory so we can return them to the coordinator, which
+# will use them to make sure other replicas also know about the deleted rows.
+# With workloads that generate a lot of tombstones, this can cause performance
+# problems and even exaust the server heap.
+# (http://www.datastax.com/dev/blog/cassandra-anti-patterns-queues-and-queue-like-datasets)
+# Adjust the thresholds here if you understand the dangers and want to
+# scan more tombstones anyway.  These thresholds may also be adjusted at runtime
+# using the StorageService mbean.
+tombstone_warn_threshold: 1000
+tombstone_failure_threshold: 100000
+
+# Log WARN on any multiple-partition batch size exceeding this value. 5kb per batch by default.
+# Caution should be taken on increasing the size of this threshold as it can lead to node instability.
+batch_size_warn_threshold_in_kb: 5
+
+# Fail any multiple-partition batch exceeding this value. 50kb (10x warn threshold) by default.
+batch_size_fail_threshold_in_kb: 50
+
+# Log WARN on any batches not of type LOGGED than span across more partitions than this limit
+unlogged_batch_across_partitions_warn_threshold: 10
+
+# Log a warning when compacting partitions larger than this value
+compaction_large_partition_warning_threshold_mb: 100
+
+# GC Pauses greater than gc_warn_threshold_in_ms will be logged at WARN level
+# Adjust the threshold based on your application throughput requirement
+# By default, Cassandra logs GC Pauses greater than 200 ms at INFO level
+gc_warn_threshold_in_ms: 1000
+
+# Maximum size of any value in SSTables. Safety measure to detect SSTable corruption
+# early. Any value size larger than this threshold will result into marking an SSTable
+# as corrupted.
+# max_value_size_in_mb: 256
+
+# Back-pressure settings #
+# If enabled, the coordinator will apply the back-pressure strategy specified below to each mutation
+# sent to replicas, with the aim of reducing pressure on overloaded replicas.
+back_pressure_enabled: false
+# The back-pressure strategy applied.
+# The default implementation, RateBasedBackPressure, takes three arguments:
+# high ratio, factor, and flow type, and uses the ratio between incoming mutation responses and outgoing mutation requests.
+# If below high ratio, outgoing mutations are rate limited according to the incoming rate decreased by the given factor;
+# if above high ratio, the rate limiting is increased by the given factor;
+# such factor is usually best configured between 1 and 10, use larger values for a faster recovery
+# at the expense of potentially more dropped mutations;
+# the rate limiting is applied according to the flow type: if FAST, it's rate limited at the speed of the fastest replica,
+# if SLOW at the speed of the slowest one.
+# New strategies can be added. Implementors need to implement org.apache.cassandra.net.BackpressureStrategy and
+# provide a public constructor accepting a Map<String, Object>.
+back_pressure_strategy:
+    - class_name: org.apache.cassandra.net.RateBasedBackPressure
+      parameters:
+        - high_ratio: 0.90
+          factor: 5
+          flow: FAST
+
+# Coalescing Strategies #
+# Coalescing multiples messages turns out to significantly boost message processing throughput (think doubling or more).
+# On bare metal, the floor for packet processing throughput is high enough that many applications won't notice, but in
+# virtualized environments, the point at which an application can be bound by network packet processing can be
+# surprisingly low compared to the throughput of task processing that is possible inside a VM. It's not that bare metal
+# doesn't benefit from coalescing messages, it's that the number of packets a bare metal network interface can process
+# is sufficient for many applications such that no load starvation is experienced even without coalescing.
+# There are other benefits to coalescing network messages that are harder to isolate with a simple metric like messages
+# per second. By coalescing multiple tasks together, a network thread can process multiple messages for the cost of one
+# trip to read from a socket, and all the task submission work can be done at the same time reducing context switching
+# and increasing cache friendliness of network message processing.
+# See CASSANDRA-8692 for details.
+
+# Strategy to use for coalescing messages in OutboundTcpConnection.
+# Can be fixed, movingaverage, timehorizon, disabled (default).
+# You can also specify a subclass of CoalescingStrategies.CoalescingStrategy by name.
+# otc_coalescing_strategy: DISABLED
+
+# How many microseconds to wait for coalescing. For fixed strategy this is the amount of time after the first
+# message is received before it will be sent with any accompanying messages. For moving average this is the
+# maximum amount of time that will be waited as well as the interval at which messages must arrive on average
+# for coalescing to be enabled.
+# otc_coalescing_window_us: 200
+
+# Do not try to coalesce messages if we already got that many messages. This should be more than 2 and less than 128.
+# otc_coalescing_enough_coalesced_messages: 8
+
+# How many milliseconds to wait between two expiration runs on the backlog (queue) of the OutboundTcpConnection.
+# Expiration is done if messages are piling up in the backlog. Droppable messages are expired to free the memory
+# taken by expired messages. The interval should be between 0 and 1000, and in most installations the default value
+# will be appropriate. A smaller value could potentially expire messages slightly sooner at the expense of more CPU
+# time and queue contention while iterating the backlog of messages.
+# An interval of 0 disables any wait time, which is the behavior of former Cassandra versions.
+#
+# otc_backlog_expiration_interval_ms: 200
\ No newline at end of file
diff --git a/etc/containerpilot.json5 b/etc/containerpilot.json5
index cfcb37e..fd342ad 100644
--- a/etc/containerpilot.json5
+++ b/etc/containerpilot.json5
@@ -1,7 +1,7 @@
 {
   consul: 'localhost:8500',
   logging: {
-    level: "DEBUG",
+    level: "INFO",
     format: "default"
   },
   jobs: [
@@ -34,9 +34,9 @@
       port: 9042,
       health: {
         exec: "nodetool -u {{ .CASSANDRA_USER }} -pw {{ .CASSANDRA_PASSWORD }} status",
-        interval: 5,
+        interval: 15,
         ttl: 15,
-        timeout: "10s"
+        timeout: "1m"
       },
       when: {
         source: 'cassandra-preStart',
diff --git a/etc/containerpilot_handler.py b/etc/containerpilot_handler.py
index 59356c9..5420ad0 100644
--- a/etc/containerpilot_handler.py
+++ b/etc/containerpilot_handler.py
@@ -45,7 +45,7 @@ def main(args):
     # either enough seed nodes appeared in consul kv or we managed to add ourselves and grab the lock
 
     # render our template in case there are existing seeds
-    log('rendering configuration during preStart')
+    log('rendering configuration during preStart, seeds: {}'.format(str(current_seeds)))
 
     # attempting to render the config immediately can result in our own volunteering being omitted
     # TODO: figure out what consul-template config would work like this (and not block indefinitely)
diff --git a/etc/containerpilot_handler/cassandra.py b/etc/containerpilot_handler/cassandra.py
index 7459cb6..0bf4482 100644
--- a/etc/containerpilot_handler/cassandra.py
+++ b/etc/containerpilot_handler/cassandra.py
@@ -118,10 +118,9 @@ def register_as_seed(self, seeds):
 
     seeds.append(own_ip)
 
-    return self.consul.kv.put(self.build_seeds_key(), ','.join(seeds), acquire=self.session_id)
+    return self.consul.kv.put(self.build_seeds_key(), ','.join(seeds))
 
   def render_config(self):
-    log('SO CLOSE')
     check_call([
       'consul-template', '-once', '-template', '/etc/cassandra/cassandra.yaml.ctmpl:/etc/cassandra/cassandra.yaml'])
     log('template rendered to: {}'.format('/etc/cassandra/cassandra.yaml'))
diff --git a/examples/compose/local-compose.yml b/examples/compose/local-compose.yml
index 7c860d2..2662612 100644
--- a/examples/compose/local-compose.yml
+++ b/examples/compose/local-compose.yml
@@ -6,7 +6,7 @@ services:
     build: ../../
     image: autopilotpattern/cassandra:latest
     restart: always
-    mem_limit: 2g
+    mem_limit: 512m
     dns: 127.0.0.1
     # uncomment the following lines for more rapid development
     volumes:

From 28b890ea526c5d6d7d01fd00acebe259e6b80307 Mon Sep 17 00:00:00 2001
From: Tomas Celaya <tjcelaya@gmail.com>
Date: Fri, 15 Dec 2017 16:17:48 -0800
Subject: [PATCH 7/7] Fix session expiration issue

---
 Dockerfile                                    | 10 ++--
 README.md                                     |  7 ++-
 etc/containerpilot.json5                      |  2 +-
 etc/containerpilot_handler.py                 |  6 +--
 etc/containerpilot_handler/cassandra.py       | 25 ++++++----
 etc/containerpilot_handler/utils.py           |  4 ++
 ...se.yml => docker-compose-multi-region.yml} | 46 ++++++++++++++-----
 examples/compose/docker-compose.yml           | 40 ++++++++++++++++
 examples/triton/docker-compose.yaml           |  9 +---
 makefile                                      |  6 +--
 10 files changed, 115 insertions(+), 40 deletions(-)
 rename examples/compose/{local-compose.yml => docker-compose-multi-region.yml} (54%)
 create mode 100644 examples/compose/docker-compose.yml

diff --git a/Dockerfile b/Dockerfile
index 833ed80..483a007 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,5 +1,9 @@
 FROM cassandra:3.11.0
 
+# trying to use cqlsh to interact with cassandra since installing cassandra-driver takes _forever_
+# if the pip module is needed the package is:
+# cassandra-driver==3.12.0
+
 # install wget unzip and dig plus python modules
 RUN set -ex \
     && apt-get update \
@@ -7,8 +11,6 @@ RUN set -ex \
     && wget --quiet -O /tmp/get-pip.py https://bootstrap.pypa.io/get-pip.py \
     && python /tmp/get-pip.py \
     && pip install \
-        # trying to use cqlsh to do this stuff, installing cassandra-driver takes _forever_
-        # cassandra-driver==3.12.0 \
         python-Consul==0.7.2 \
         manta==2.6.0 \
         pyyaml==3.12 \
@@ -59,7 +61,7 @@ COPY etc/containerpilot_handler /usr/local/bin/containerpilot_handler
 COPY etc/containerpilot_handler.py /usr/local/bin/containerpilot_handler.py
 # COPY etc/cassandra.yaml.ctmpl /etc/cassandra/cassandra.yaml.ctmpl
 
-# the following COPY should be used for minimal-memory installations (as low as 256m?)
+# the following COPY should be used for minimal-memory installations, potentially as low as 256m
 COPY etc/cassandra.tiny.yaml.ctmpl /etc/cassandra/cassandra.yaml.ctmpl
 
 # disable the automatic seed configuration that enables single-node bootstrapping
@@ -68,7 +70,7 @@ COPY etc/cassandra.tiny.yaml.ctmpl /etc/cassandra/cassandra.yaml.ctmpl
 RUN sed -ri '/CASSANDRA_SEEDS.*CASSANDRA_BROADCAST_ADDRESS/d' /docker-entrypoint.sh && \
     sed -ri '/sed -ri.*CASSANDRA_SEEDS.*\/cassandra.yaml/d' /docker-entrypoint.sh
 
-# TODO: uncomment for tiny cassandra nodes (don't forget to change the COPY above to cassandra.tiny.yaml.ctmpl)
+# TODO: uncomment for tiny cassandra nodes, don't forget to change the COPY above to cassandra.tiny.yaml.ctmpl
 RUN sed -ri 's/^#MAX_HEAP_SIZE.*/MAX_HEAP_SIZE="64M"/' /etc/cassandra/cassandra-env.sh && \
     sed -ri 's/^#HEAP_NEWSIZE.*/HEAP_NEWSIZE="12M"/' /etc/cassandra/cassandra-env.sh
 
diff --git a/README.md b/README.md
index 94db97a..c482c8f 100644
--- a/README.md
+++ b/README.md
@@ -29,4 +29,9 @@ docker-compose exec cassandra cqlsh cassandra
 cqlsh> CREATE KEYSPACE demo WITH replication = {'class': 'NetworkTopologyStrategy', 'datacenter1': 2 };
 USE demo;
 
-```
\ No newline at end of file
+```
+
+
+# Credits
+
+- Minimal Cassandra configuration based on John Berryman's [Building the Perfect Cassandra Test Environment](http://opensourceconnections.com/blog/2013/08/31/building-the-perfect-cassandra-test-environment/)
diff --git a/etc/containerpilot.json5 b/etc/containerpilot.json5
index fd342ad..20a6729 100644
--- a/etc/containerpilot.json5
+++ b/etc/containerpilot.json5
@@ -1,5 +1,5 @@
 {
-  consul: 'localhost:8500',
+  consul: '{{ if .CONSUL_AGENT }}localhost{{ else }}{{ .CONSUL }}{{ end }}:{{ .CONSUL_PORT | default "8500" }}',
   logging: {
     level: "INFO",
     format: "default"
diff --git a/etc/containerpilot_handler.py b/etc/containerpilot_handler.py
index 5420ad0..7075202 100644
--- a/etc/containerpilot_handler.py
+++ b/etc/containerpilot_handler.py
@@ -26,7 +26,7 @@ def main(args):
 
   log('node configuration: {}'.format(node))
 
-  current_seeds = node.query_seeds()
+  seeds_modify_idx, current_seeds = node.query_seeds()
 
   if 'fakeBoot' in args:
     log('pretending to boot')
@@ -37,9 +37,9 @@ def main(args):
 
   if 'preStart' in args:
     # loop while we try to grab a lock on the seeds list
-    while not node.enough_seeds_exist(current_seeds) and not node.register_as_seed(current_seeds):
+    while not node.enough_seeds_exist(current_seeds) and not node.register_as_seed(current_seeds, seeds_modify_idx):
       sleep(5)
-      current_seeds = node.query_seeds()
+      seeds_modify_idx, current_seeds = node.query_seeds()
       log('waiting for seeds lock, current seed list: {}'.format(str(current_seeds)))
 
     # either enough seed nodes appeared in consul kv or we managed to add ourselves and grab the lock
diff --git a/etc/containerpilot_handler/cassandra.py b/etc/containerpilot_handler/cassandra.py
index 0bf4482..d2832e6 100644
--- a/etc/containerpilot_handler/cassandra.py
+++ b/etc/containerpilot_handler/cassandra.py
@@ -1,6 +1,7 @@
 from __future__ import print_function
 from os.path import exists
 from sys import stderr
+from consul.base import NotFound
 from socket import gethostname, gethostbyname
 from datetime import datetime
 from containerpilot_handler.utils import log
@@ -24,7 +25,11 @@ def __init__(self, consul, storage, home, user, password, datacenter, cluster_na
     self.cluster_name = cluster_name
 
     self.session_id = self.load_or_create_session()
-    self.persist_session()
+    try:
+      self.persist_session()
+    except NotFound as e:
+      self.session_id = self.load_or_create_session(forceNew=True)
+      self.persist_session()
 
   def __str__(self):
     return 'Cassandra <id={}, consul={}, storage={}, user={}, datacenter={}, cluster_name={} session_id={}>'.format(
@@ -36,11 +41,13 @@ def build_seeds_key(self):
   def build_snapshot_key(self):
     return 'cassandra-snapshot-{}-{}'.format(self.datacenter, gethostname())
 
-  def load_or_create_session(self):
-    if exists(Cassandra.FILE_SESSION_ID):
+  def load_or_create_session(self, forceNew=False):
+    if exists(Cassandra.FILE_SESSION_ID) and forceNew:
       log('found session file')
       with open(Cassandra.FILE_SESSION_ID, 'r') as session_file:
         return session_file.read()
+    else:
+      log('skipping cached session load')
 
     log('creating new session')
     return self.consul.session.create(self.id, behavior='delete', ttl=120)
@@ -61,14 +68,14 @@ def query_snapshot_state(self):
     return snapshot['Value']
 
   def query_seeds(self):
-    _, seeds = self.consul.kv.get(self.build_seeds_key())
+    midx, seeds = self.consul.kv.get(self.build_seeds_key())
     if seeds is None:
-      return None
+      return midx, None
 
     if seeds['Value'] is None:
-      return []
+      return midx, []
 
-    return seeds['Value'].split(',')
+    return midx, seeds['Value'].split(',')
 
   def read_saved_seeds(self, should_retry=True):
     loaded_conf = None
@@ -110,7 +117,7 @@ def already_registered_as_seed(self, seeds):
 
     return gethostbyname(gethostname()) in [s.strip() for s in seeds]
 
-  def register_as_seed(self, seeds):
+  def register_as_seed(self, seeds, modify_index):
     if seeds is None:
       seeds = []
 
@@ -118,7 +125,7 @@ def register_as_seed(self, seeds):
 
     seeds.append(own_ip)
 
-    return self.consul.kv.put(self.build_seeds_key(), ','.join(seeds))
+    return self.consul.kv.put(self.build_seeds_key(), ','.join(seeds), cas=modify_index)
 
   def render_config(self):
     check_call([
diff --git a/etc/containerpilot_handler/utils.py b/etc/containerpilot_handler/utils.py
index 5f13090..8494d73 100644
--- a/etc/containerpilot_handler/utils.py
+++ b/etc/containerpilot_handler/utils.py
@@ -49,6 +49,10 @@ def resolve_datacenter(c):
   if 'CASSANDRA_DC' in environ:
     return environ['CASSANDRA_DC']
 
+  # TODO: figure out what priority this mdata-get call should have relative to Consul
+  # if exists('/native/usr/sbin/mdata-get') and consul is None:
+  #   return check_output('/native/usr/sbin/mdata-get sdc:datacenter_name')
+
   if not isinstance(c, Consul):
     raise ValueError('unexpected type for consul instance when resolving datacenter: {}'.format(type(c)))
 
diff --git a/examples/compose/local-compose.yml b/examples/compose/docker-compose-multi-region.yml
similarity index 54%
rename from examples/compose/local-compose.yml
rename to examples/compose/docker-compose-multi-region.yml
index 2662612..5082754 100644
--- a/examples/compose/local-compose.yml
+++ b/examples/compose/docker-compose-multi-region.yml
@@ -2,7 +2,7 @@ version: '2.1'
 # Cassandra demonstration of the Autopilot pattern
 
 services:
-  cassandra:
+  cassandra-dc1:
     build: ../../
     image: autopilotpattern/cassandra:latest
     restart: always
@@ -16,23 +16,45 @@ services:
       - ../../tmp:/tmp/snapshots
     environment:
       - CONSUL=consuldc1
-      - CASSANDRA_USER=cassandra
-      - CASSANDRA_PASSWORD=cassandra
+      - CASSANDRA_USER=c
+      - CASSANDRA_PASSWORD=c
       - CASSANDRA_CLUSTER_NAME=demo
-      - CASSANDRA_KEYSPACES=demo
-      - CASSANDRA_TOPOLOGY={"demo":{"datacenter1":1}}
-      - CASSANDRA_ENDPOINT_SNITCH=SimpleSnitch
       - SNAPSHOT_TARGET=file:///tmp/snapshots
       # the following options pertain to multi-datacenter deployments
       # - CASSANDRA_ENDPOINT_SNITCH=GossipingPropertyFileSnitch
       # - CASSANDRA_DC=
       # - CASSANDRA_RACK=
     links:
-      - consuldc1:consul
+      - consul-dc1:consul
 
-  consuldc1:
+  cassandra-dc2:
+    build: ../../
+    image: autopilotpattern/cassandra:latest
+    restart: always
+    mem_limit: 512m
+    dns: 127.0.0.1
+    # uncomment the following lines for more rapid development
+    volumes:
+      - ../../etc/containerpilot_handler:/usr/local/bin/containerpilot_handler
+      - ../../etc/containerpilot_handler.py:/usr/local/bin/containerpilot_handler.py
+      - ../../etc/cassandra.yaml.ctmpl:/etc/cassandra/cassandra.yaml.ctmpl
+      - ../../tmp:/tmp/snapshots
+    environment:
+      - CONSUL=consuldc1
+      - CASSANDRA_USER=c
+      - CASSANDRA_PASSWORD=c
+      - CASSANDRA_CLUSTER_NAME=demo
+      - SNAPSHOT_TARGET=file:///tmp/snapshots
+      # the following options pertain to multi-datacenter deployments
+      # - CASSANDRA_ENDPOINT_SNITCH=GossipingPropertyFileSnitch
+      # - CASSANDRA_DC=
+      # - CASSANDRA_RACK=
+    links:
+      - consul-dc1:consul
+
+  consul-dc1:
     build: .
-    image: autopilotpattern/consul:${TAG:-latest}
+    image: autopilotpattern/consul:latest
     restart: always
     mem_limit: 128m
     ports:
@@ -43,8 +65,8 @@ services:
     command: >
       /usr/local/bin/containerpilot
 
-  consuldc2:
-    image: autopilotpattern/consul:${TAG:-latest}
+  consul-dc2:
+    image: autopilotpattern/consul:latest
     restart: always
     mem_limit: 128m
     ports:
@@ -56,4 +78,4 @@ services:
     command: >
       /usr/local/bin/containerpilot
     links:
-      - consuldc1
+      - consul-dc1
diff --git a/examples/compose/docker-compose.yml b/examples/compose/docker-compose.yml
new file mode 100644
index 0000000..050e573
--- /dev/null
+++ b/examples/compose/docker-compose.yml
@@ -0,0 +1,40 @@
+version: '2.1'
+# Cassandra demonstration of the Autopilot pattern
+
+services:
+  cassandra:
+    build: ../../
+    image: tjcelaya/cassandra
+    restart: always
+    mem_limit: 512m
+    dns: 127.0.0.1
+    # uncomment the following lines for more rapid development
+    volumes:
+      - ../../etc/containerpilot_handler:/usr/local/bin/containerpilot_handler
+      - ../../etc/containerpilot_handler.py:/usr/local/bin/containerpilot_handler.py
+      - ../../etc/cassandra.yaml.ctmpl:/etc/cassandra/cassandra.yaml.ctmpl
+      - ../../tmp:/tmp/snapshots
+    environment:
+      - CONSUL_AGENT=1
+      - CONSUL=consul
+      - CASSANDRA_USER=c
+      - CASSANDRA_PASSWORD=c
+      - CASSANDRA_CLUSTER_NAME=demo
+      - CASSANDRA_ENDPOINT_SNITCH=SimpleSnitch
+      - SNAPSHOT_TARGET=file:///tmp/snapshots
+    links:
+      - consul:consul
+
+  consul:
+    build: .
+    image: tjcelaya/consul
+    restart: always
+    mem_limit: 128m
+    ports:
+        - 8500
+    environment:
+      - CONSUL_DEV=1
+      - CONSUL=consul
+      - CONSUL_DATACENTER_NAME=dc1
+    command: >
+      /usr/local/bin/containerpilot
diff --git a/examples/triton/docker-compose.yaml b/examples/triton/docker-compose.yaml
index 62bc8a6..024264a 100644
--- a/examples/triton/docker-compose.yaml
+++ b/examples/triton/docker-compose.yaml
@@ -27,13 +27,8 @@ services:
     mem_limit: 128m
     network_mode: bridge
     ports:
-      - 8500:8500
+      - 8500
     labels:
       - triton.cns.services=cassandra-consul
     command: >
-      /usr/local/bin/containerpilot
-      /bin/consul agent -server
-        -config-dir=/etc/consul
-        -log-level=err
-        -bootstrap-expect 1
-        -ui-dir /ui
+      /usr/local/bin/containerpilot
\ No newline at end of file
diff --git a/makefile b/makefile
index 05eccb3..9536f12 100644
--- a/makefile
+++ b/makefile
@@ -1,6 +1,6 @@
-DC := docker-compose -p autopilotpattern -f examples/compose/local-compose.yml
+DC := docker-compose
 
-CONSUL_ADDR := $(shell $(DC) ps consuldc1 | egrep -o '0.0.0.0:\d+' | head -1)
+CONSUL_ADDR := $(shell $(DC) ps consul | egrep -o '0.0.0.0:\d+' | head -1)
 CONSUL_URL := $(shell echo "http://$(CONSUL_ADDR)")
 
 .PHONY: *
@@ -15,7 +15,7 @@ restart-cassandra:
 	$(DC) stop cassandra
 	$(DC) rm -vf cassandra
 	$(DC) build cassandra
-	$(DC) up -d --scale=cassandra=2 --scale=consuldc1=3 cassandra consuldc1
+	$(DC) up -d --scale=cassandra=1 --scale=consul=3 cassandra consul
 	$(DC) logs -f cassandra
 
 consul: