diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 5d6e313..ba618d5 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -14,7 +14,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [ubuntu-latest, windows-latest, macos-latest]
-        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
+        python-version: ["3.9", "3.10", "3.11", "3.12"]
     runs-on: ${{ matrix.os }}
 
     steps:
@@ -25,11 +25,13 @@ jobs:
         uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python-version }}
+
       - name: Install dependencies
         shell: bash
         run: |
           python -m pip install --upgrade pip
           python -m pip install flake8 pytest
+
       - name: Lint with flake8
         shell: bash
         run: |
@@ -37,11 +39,52 @@ jobs:
           flake8 . --count  --select=E9,F63,F7,F82 --show-source --statistics
           # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
           flake8 . --count --ignore=F401,W503,E203 --max-complexity=99 --max-line-length=127 --statistics
+
       - name: Install h5json
         shell: bash
         run: |
           pip install -e .
+
+      - name: Checkout HSDS 
+        uses: actions/checkout@v4
+        with:
+          repository: HDFGroup/hsds
+          path: ${{github.workspace}}/hsds
+
+      - name: Install HSDS
+        working-directory: ${{github.workspace}}/hsds
+        shell: bash
+        run: |
+          pip install -e .
+
+      - name: Start HSDS
+        shell: bash
+        working-directory: ${{github.workspace}}/hsds
+        run: |
+          mkdir hsds_root
+          mkdir hsds_root/hsds_bucket
+          cp admin/config/groups.default admin/config/groups.txt
+          cp admin/config/passwd.default admin/config/passwd.txt
+          hsds --root_dir hsds_root --host localhost --port 5101 --password_file admin/config/passwd.txt --logfile hs.log --loglevel DEBUG --config_dir=admin/config --count=4 &
+
+      - name: Wait for node startup
+        shell: bash
+        run: |
+          sleep 30
+      
+      - name: HSDS Setup
+        shell: bash
+        env:
+          ADMIN_PASSWORD: admin
+          ADMIN_USERNAME: admin
+        working-directory: ${{github.workspace}}/hsds
+        run: |
+          python tests/integ/setup_test.py
+
       - name: Run tests
         shell: bash
+        HS_ENDPOINT: http://localhost:5101
+        HS_USERNAME: test_user1
+        HS_PASSWORD: test
         run: |
           python testall.py
diff --git a/data/hdf5/dset_creationprop.h5 b/data/hdf5/dset_creationprop.h5
index ff5b7a7..12b7a32 100644
Binary files a/data/hdf5/dset_creationprop.h5 and b/data/hdf5/dset_creationprop.h5 differ
diff --git a/data/json/nullspace_dset.json b/data/json/nullspace_dset.json
deleted file mode 100644
index 8808f21..0000000
--- a/data/json/nullspace_dset.json
+++ /dev/null
@@ -1,34 +0,0 @@
-{
-    "apiVersion": "1.1.0",
-    "datasets": {
-        "23d3e919-7b53-11e4-961d-3c15c2da029e": {
-            "alias": [
-                "/DS1"
-            ],
-            "shape": {
-                "class": "H5S_NULL"
-            },
-            "type": {
-                "base": "H5T_STD_I32LE",
-                "class": "H5T_INTEGER"
-            },
-            "value": null
-        }
-    },
-    "groups": {
-        "23d2e06b-7b53-11e4-9910-3c15c2da029e": {
-            "alias": [
-                "/"
-            ],
-            "links": [
-                {
-                    "class": "H5L_TYPE_HARD",
-                    "collection": "datasets",
-                    "id": "23d3e919-7b53-11e4-961d-3c15c2da029e",
-                    "title": "DS1"
-                }
-            ]
-        }
-    },
-    "root": "23d2e06b-7b53-11e4-9910-3c15c2da029e"
-}
diff --git a/pyproject.toml b/pyproject.toml
index bcba820..879e7ff 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -9,7 +9,6 @@ classifiers = [
     "Topic :: Software Development :: Build Tools",
     "License :: OSI Approved :: BSD License",
     "Programming Language :: Python :: 3",
-    "Programming Language :: Python :: 3.8",
     "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
@@ -17,19 +16,19 @@ classifiers = [
 ]
 authors = [{ "name" = "The HDF Group", "email" = "help@hdfgroup.org" }]
 keywords = ["json", "hdf5", "multidimensional array", "data", "datacube"]
-requires-python = ">=3.8"
+requires-python = ">=3.9"
 dependencies = [
-    "h5py >=3.10",
+    "h5py >= 3.10",
     "numpy >= 2.0; python_version>='3.9'",
     "jsonschema >=4.4.0",
     "tomli; python_version<'3.11'",
-    "numpy >=1.20,<2.0.0; python_version=='3.8'",
 ]
+
 dynamic = ["version"]
 
 [project.urls]
-Homepage = "https://hdf5-json.readthedocs.io"
-Documentation = "https://hdf5-json.readthedocs.io"
+Homepage = "https://support.hdfgroup.org/documentation/hdf5-json/latest/"
+Documentation = "https://support.hdfgroup.org/documentation/hdf5-json/latest/"
 Source = "https://github.com/HDFGroup/hdf5-json"
 "Bug Reports" = "https://github.com/HDFGroup/hdf5-json/issues"
 Social = "https://twitter.com/hdf5"
@@ -52,6 +51,9 @@ build-backend = "setuptools.build_meta"
 package-dir = { "" = "src" }
 packages = [
     "h5json",
+    "h5json.jsonstore",
+    "h5json.h5pystore",
+    "h5json.hsdsstore",
     "h5json.h5tojson",
     "h5json.jsontoh5",
     "h5json.schema",
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 0000000..b2f3e82
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,6 @@
+[flake8]
+max-line-length = 120
+# E402: module level import not at top of file
+# C901: too complex
+# F401: unused exports are necessary in __init__.py
+ignore = E402, C901, F401
diff --git a/src/h5json/__init__.py b/src/h5json/__init__.py
index 704d241..d4a7f78 100644
--- a/src/h5json/__init__.py
+++ b/src/h5json/__init__.py
@@ -21,6 +21,14 @@
 from .hdf5dtype import getTypeResponse
 from .hdf5dtype import getItemSize
 from .hdf5dtype import createDataType
+from .objid import createObjId
+from .objid import getCollectionForId
+from .objid import isObjId
+from .objid import isS3ObjKey
+from .objid import getS3Key
+from .objid import getObjId
+from .objid import isSchema2Id
+from .objid import isRootObjId
 from .hdf5db import Hdf5db
 from . import _version
 
diff --git a/src/h5json/array_util.py b/src/h5json/array_util.py
new file mode 100644
index 0000000..cb39cd5
--- /dev/null
+++ b/src/h5json/array_util.py
@@ -0,0 +1,713 @@
+##############################################################################
+# Copyright by The HDF Group.                                                #
+# All rights reserved.                                                       #
+#                                                                            #
+# This file is part of HSDS (HDF5 Scalable Data Service), Libraries and      #
+# Utilities.  The full HSDS copyright notice, including                      #
+# terms governing use, modification, and redistribution, is contained in     #
+# the file COPYING, which can be found at the root of the source code        #
+# distribution tree.  If you do not have access to this file, you may        #
+# request a copy from help@hdfgroup.org.                                     #
+##############################################################################
+
+import math
+import base64
+import binascii
+import numpy as np
+
+from .hdf5dtype import isVlen
+
+MAX_VLEN_ELEMENT = 1_000_000  # restrict largest vlen element to one million
+
+
+def bytesArrayToList(data):
+    """
+    Convert list that may contain bytes type elements to list of string elements
+
+    TBD: Need to deal with non-string byte data (hexencode?)
+    """
+    if type(data) in (bytes, str):
+        is_list = False
+    elif isinstance(data, (np.ndarray, np.generic)):
+        if len(data.shape) == 0:
+            is_list = False
+            data = data.tolist()  # tolist will return a scalar in this case
+            if type(data) in (list, tuple, np.ndarray):
+                is_list = True
+            else:
+                is_list = False
+        else:
+            is_list = True
+    elif type(data) in (list, tuple):
+        is_list = True
+    else:
+        is_list = False
+    if is_list:
+        out = []
+        for item in data:
+            try:
+                rec_item = bytesArrayToList(item)  # recursive call
+                out.append(rec_item)
+            except ValueError as err:
+                raise err
+    elif type(data) is bytes:
+        try:
+            out = data.decode("utf-8")
+        except UnicodeDecodeError as err:
+            raise ValueError(err)
+    else:
+        out = data
+
+    return out
+
+
+def toTuple(rank, data, encoding=None):
+    """
+    Convert a list to a tuple, recursively.
+    Example. [[1,2],[3,4]] -> ((1,2),(3,4))
+    """
+    if type(data) in (list, tuple):
+        if rank > 0:
+            return list(toTuple(rank - 1, x) for x in data)
+        else:
+            return tuple(toTuple(rank - 1, x) for x in data)
+    else:
+        if encoding:
+            data = data.encode(encoding, "surrogateesacpe")
+        return data
+
+
+def getArraySize(arr):
+    """
+    Get size in bytes of a numpy array.
+    """
+    nbytes = arr.dtype.itemsize
+    for n in arr.shape:
+        nbytes *= n
+    return nbytes
+
+
+def getNumElements(dims):
+    """
+    Get num elements defined by a shape
+    """
+    num_elements = 0
+    if isinstance(dims, int):
+        num_elements = dims
+    elif isinstance(dims, (list, tuple)):
+        num_elements = 1
+        for dim in dims:
+            num_elements *= dim
+    else:
+        raise ValueError("Unexpected argument")
+    return num_elements
+
+
+def jsonToArray(data_shape, data_dtype, data_json):
+    """
+    Return numpy array from the given json array.
+    """
+
+    # print(f"jsonToArray - data_shape: {data_shape} dtype: {data_dtype} data: {data_json}")
+
+    def get_array(data, rank, dtype):
+        # helper function to create an array with encoding if needed
+        try:
+            arr = np.array(data, dtype=dtype)
+        except UnicodeEncodeError:
+            # Unable to encode data, encode as utf8 with surrogate escaping
+            data = toTuple(rank, data, encoding="utf8")
+            arr = np.array(data, dtype=dtype)
+        return arr
+
+    if data_json is None:
+        return np.array([]).astype(data_dtype)
+
+    if isinstance(data_json, (list, tuple)):
+        if None in data_json:
+            return np.array([]).astype(data_dtype)
+
+    # need some special conversion for compound types --
+    # each element must be a tuple, but the JSON decoder
+    # gives us a list instead.
+    if len(data_dtype) > 0 and not isinstance(data_json, (list, tuple)):
+        raise TypeError("expected list data for compound data type")
+    npoints = getNumElements(data_shape)
+    np_shape_rank = len(data_shape)
+
+    if type(data_json) in (list, tuple):
+        data_json = toTuple(np_shape_rank, data_json)
+
+    if isVlen(data_dtype):
+        # for vlen data we need to initialize of zero numpy array to ensure the right shape
+        arr = np.zeros(data_shape, dtype=data_dtype)
+        arr[...] = data_json
+    else:
+        try:
+            arr = get_array(data_json, np_shape_rank, data_dtype)
+        except ValueError:
+            if npoints <= 1 and isinstance(data_json, list):
+                # try converting data to a tuple
+                arr = get_array(tuple(data_json), np_shape_rank, data_dtype)
+            else:
+                raise
+
+    # raise an exception of the array shape doesn't match the selection shape
+    # allow if the array is a scalar and the selection shape is one element,
+    # numpy is ok with this
+    if arr.size != npoints:
+        msg = "Input data doesn't match selection number of elements"
+        msg += f" Expected {npoints}, but received: {arr.size}"
+        # try adding an extra dimension to data_json
+        # for cases where e.g. compound types are not getting interpreted correctly
+        data_json = toTuple(np_shape_rank, [data_json, ])
+        arr = get_array(data_json, np_shape_rank, data_dtype)
+        if arr.size != npoints:
+            # still no good, raise error
+            raise ValueError(msg)
+
+    if arr.shape != tuple(data_shape):
+        arr = arr.reshape(tuple(data_shape))
+
+    return arr
+
+
+def getElementSize(e, dt):
+    """
+    Get number of byte needed to given element as a bytestream
+    """
+    # print(f"getElementSize - e: {e}  dt: {dt} metadata: {dt.metadata}")
+    if len(dt) > 1:
+        count = 0
+        for name in dt.names:
+            field_dt = dt[name]
+            field_val = e[name]
+            count += getElementSize(field_val, field_dt)
+    elif not dt.base.metadata or "vlen" not in dt.base.metadata:
+        count = dt.itemsize  # fixed size element
+    else:
+        # variable length element
+        vlen = dt.base.metadata["vlen"]
+        if isinstance(e, int):
+            if e == 0:
+                count = 4  # non-initialized element
+            else:
+                raise ValueError(f"Unexpected value: {e}")
+        elif isinstance(e, bytes):
+            count = len(e) + 4
+        elif isinstance(e, str):
+            count = len(e.encode("utf-8")) + 4
+        elif isinstance(e, np.ndarray):
+            nElements = math.prod(e.shape)
+            if e.dtype.kind != "O":
+                count = e.dtype.itemsize * nElements
+            else:
+                arr1d = e.reshape((nElements,))
+                count = 0
+                for item in arr1d:
+                    count += getElementSize(item, dt)
+            count += 4  # byte count
+        elif isinstance(e, list) or isinstance(e, tuple):
+            if not e:
+                # empty list, just add byte count
+                count = 4
+            else:
+                # not sure how to deal with this
+                count = len(e) * vlen.itemsize + 4  # +4 for byte count
+        else:
+            raise TypeError("unexpected type: {}".format(type(e)))
+    # print("getElementSize returning:", count)
+    return count
+
+
+def getByteArraySize(arr):
+    """
+    Get number of bytes needed to store given numpy array as a bytestream
+    """
+    if not isVlen(arr.dtype):
+        return arr.itemsize * math.prod(arr.shape)
+    nElements = math.prod(arr.shape)
+    # reshape to 1d for easier iteration
+    arr1d = arr.reshape((nElements,))
+    dt = arr1d.dtype
+    count = 0
+    for e in arr1d:
+        count += getElementSize(e, dt)
+    return count
+
+
+def copyBuffer(src, des, offset):
+    """
+    Copy to buffer at given offset
+    """
+    # print(f"copyBuffer - src: {src} offset: {offset}")
+    # TBD: just do: des[offset:] = src[:]  ?
+    for i in range(len(src)):
+        des[i + offset] = src[i]
+
+    # print("returning:", offset + len(src))
+    return offset + len(src)
+
+
+def copyElement(e, dt, buffer, offset):
+    """
+    Copy element to bytearray
+    """
+
+    # print(f"copyElement - dt: {dt}  offset: {offset}")
+    if len(dt) > 1:
+        for name in dt.names:
+            field_dt = dt[name]
+            field_val = e[name]
+            offset = copyElement(field_val, field_dt, buffer, offset)
+    elif not dt.base.metadata or "vlen" not in dt.base.metadata:
+        # print(f"no vlen: {e} type: {type(e)} e.dtype: {e.dtype} itemsize: {dt.itemsize}")
+        e_buf = np.asarray(e, dtype=dt).tobytes()
+        if len(e_buf) < dt.itemsize:
+            # extend the buffer for fixed size strings
+            e_buf_ex = bytearray(dt.itemsize)
+            for i in range(len(e_buf)):
+                e_buf_ex[i] = e_buf[i]
+            e_buf = bytes(e_buf_ex)
+
+        offset = copyBuffer(e_buf, buffer, offset)
+    else:
+        # variable length element
+        vlen = dt.base.metadata["vlen"]
+        if isinstance(e, int):
+            if e == 0:
+                # write 4-byte integer 0 to buffer
+                offset = copyBuffer(b"\x00\x00\x00\x00", buffer, offset)
+            else:
+                raise ValueError("Unexpected value: {}".format(e))
+        elif isinstance(e, bytes):
+            count = np.int32(len(e))
+            if count > MAX_VLEN_ELEMENT:
+                raise ValueError("vlen element too large")
+            offset = copyBuffer(count.tobytes(), buffer, offset)
+            offset = copyBuffer(e, buffer, offset)
+        elif isinstance(e, str):
+            text = e.encode("utf-8")
+            count = np.int32(len(text))
+            if count > MAX_VLEN_ELEMENT:
+                raise ValueError("vlen element too large")
+            offset = copyBuffer(count.tobytes(), buffer, offset)
+            offset = copyBuffer(text, buffer, offset)
+
+        elif isinstance(e, np.ndarray):
+            nElements = math.prod(e.shape)
+
+            if e.dtype.kind != "O":
+                count = np.int32(e.dtype.itemsize * nElements)
+                if count > MAX_VLEN_ELEMENT:
+                    raise ValueError("vlen element too large")
+                offset = copyBuffer(count.tobytes(), buffer, offset)
+                offset = copyBuffer(e.tobytes(), buffer, offset)
+            else:
+                arr1d = e.reshape((nElements,))
+                for item in arr1d:
+                    offset = copyElement(item, dt, buffer, offset)
+
+        elif isinstance(e, list) or isinstance(e, tuple):
+            # print("cooyBuffer list/tuple  vlen:", vlen, "e:", e)
+            count = np.int32(len(e) * vlen.itemsize)
+            offset = copyBuffer(count.tobytes(), buffer, offset)
+            if isinstance(e, np.ndarray):
+                arr = e
+            else:
+                arr = np.asarray(e, dtype=vlen)
+            offset = copyBuffer(arr.tobytes(), buffer, offset)
+
+        else:
+            raise TypeError("unexpected type: {}".format(type(e)))
+    return offset
+
+
+def getElementCount(buffer, offset=0):
+    """
+    Get the count value from persisted vlen array
+    """
+
+    n = offset
+    m = offset + 4
+    count_bytes = bytes(buffer[n:m])
+
+    try:
+        count = int(np.frombuffer(count_bytes, dtype="<i4")[0])
+    except TypeError as e:
+        msg = f"Unexpected error reading count value for varlen element: {e}"
+        raise TypeError(msg)
+    if count < 0:
+        # shouldn't be negative
+        raise ValueError(f"Unexpected count value for varlen element: {count}")
+    if count > MAX_VLEN_ELEMENT:
+        # expect variable length element to be between 0 and 1mb
+        raise ValueError("varlen element size expected to be less than 1MB")
+    return count
+
+
+def readElement(buffer, offset, arr, index, dt):
+    """
+    Read a single element from buffer into array.
+
+    Parameters:
+        buffer (bytearray): Byte array to read an element from.
+        offset (int): Starting offset in the buffer.
+        arr (numpy.ndarray): Array to store the element.
+        index (int): Index in 'arr' at which to store the element.
+        dt (numpy.dtype): Numpy datatype of the element.
+
+    Note: If the provided datatype is a variable-length sequence,
+    this function will read the byte count from the first 4 bytes
+    of the buffer, and then read the entire sequence.
+
+    Returns:
+        int: The updated offset value after reading the element.
+    """
+    # print("readElement, offset:", offset)
+    if len(dt) > 1:
+        e = arr[index]
+        for name in dt.names:
+            field_dt = dt[name]
+            offset = readElement(buffer, offset, e, name, field_dt)
+    elif not dt.base.metadata or "vlen" not in dt.base.metadata:
+        count = dt.itemsize
+        n = offset
+        m = offset + count
+        e_buffer = buffer[n:m]
+        offset += count
+        try:
+            e = np.frombuffer(bytes(e_buffer), dtype=dt)
+            arr[index] = e[0]
+
+        except ValueError:
+            # print(f"ValueError setting {e_buffer} and dtype: {dt}")
+            raise
+    else:
+        # variable length element
+        vlenBaseType = dt.base.metadata["vlen"]
+        e = arr[index]
+
+        if isinstance(e, np.ndarray):
+            nelements = math.prod(dt.shape)
+            e.reshape((nelements,))
+            for i in range(nelements):
+                offset = readElement(buffer, offset, e, i, dt)
+            e.reshape(dt.shape)
+        else:
+            # total number of bytes in the vlen sequence/variable-length string
+            count = getElementCount(buffer, offset=offset)
+            offset += 4
+            n = offset
+            m = offset + count
+            if count > 0:
+                e_buffer = buffer[n:m]
+                offset += count
+
+                if vlenBaseType is bytes:
+                    arr[index] = bytes(e_buffer)
+                elif vlenBaseType is str:
+                    s = e_buffer.decode("utf-8")
+                    arr[index] = s
+                else:
+                    try:
+                        e = np.frombuffer(bytes(e_buffer), dtype=vlenBaseType)
+                    except ValueError:
+                        msg = f"Failed to parse vlen data: {e_buffer} with dtype: {vlenBaseType}"
+                        raise ValueError(msg)
+                    arr[index] = e
+    return offset
+
+
+def encodeData(data, encoding="base64"):
+    """ Encode given data """
+    if encoding != "base64":
+        raise ValueError("only base64 encoding is supported")
+    try:
+        if isinstance(data, str):
+            data = data.encode("utf8")
+    except UnicodeEncodeError:
+        raise ValueError("can not encode string value")
+    if not isinstance(data, bytes):
+        msg = "Expected str or bytes type to encodeData, "
+        msg += f"but got: {type(data)}"
+        raise TypeError(msg)
+    try:
+        encoded_data = base64.b64encode(data)
+    except Exception as e:
+        # TBD: what exceptions can be raised?
+        raise ValueError(f"Unable to encode: {e}")
+    return encoded_data
+
+
+def decodeData(data, encoding="base64"):
+    if encoding != "base64":
+        raise ValueError("only base64 decoding is supported")
+    try:
+        decoded_data = base64.b64decode(data)
+    except Exception as e:
+        # TBD: catch actual exception
+        raise ValueError(f"Unable to decode: {e}")
+    return decoded_data
+
+
+def arrayToBytes(arr, encoding=None):
+    """
+    Return byte representation of numpy array
+    """
+
+    if isVlen(arr.dtype):
+        nSize = getByteArraySize(arr)
+        buffer = bytearray(nSize)
+        offset = 0
+        nElements = math.prod(arr.shape)
+        arr1d = arr.reshape((nElements,))
+        for e in arr1d:
+            offset = copyElement(e, arr1d.dtype, buffer, offset)
+        data = bytes(buffer)
+    else:
+        # fixed length type
+        data = arr.tobytes()
+
+    if encoding:
+        data = encodeData(data)
+    return data
+
+
+def bytesToArray(data, dt, shape, encoding=None):
+    """
+    Create numpy array based on byte representation
+    """
+    if encoding:
+        # decode the data
+        # will raise ValueError if non-decodable
+        data = decodeData(data)
+    if not isVlen(dt):
+        # regular numpy from string
+        arr = np.frombuffer(data, dtype=dt)
+    else:
+        nElements = getNumElements(shape)
+
+        arr = np.zeros((nElements,), dtype=dt)
+        offset = 0
+        for index in range(nElements):
+            offset = readElement(data, offset, arr, index, dt)
+    if shape is not None:
+        arr = arr.reshape(shape)
+    # check that we can update the array if needed
+    # Note: this seems to have been required starting with numpuy v 1.17
+    # Setting the flag directly is not recommended.
+    # cf: https://github.com/numpy/numpy/issues/9440
+
+    if not arr.flags["WRITEABLE"]:
+        arr_copy = arr.copy()
+        arr = arr_copy
+
+    return arr
+
+
+def getNumpyValue(value, dt=None, encoding=None):
+    """
+    Return value as numpy type for given dtype and encoding
+    Encoding is expected to be one of None or "base64"
+    """
+    # create a scalar numpy array
+    arr = np.zeros((), dtype=dt)
+
+    if encoding and not isinstance(value, str):
+        msg = "Expected value to be string to use encoding"
+        raise ValueError(msg)
+
+    if encoding == "base64":
+        try:
+            data = base64.decodebytes(value.encode("utf-8"))
+        except binascii.Error:
+            msg = "Unable to decode base64 string: {value}"
+            # log.warn(msg)
+            raise ValueError(msg)
+        arr = bytesToArray(data, dt, dt.shape)
+    else:
+        if isinstance(value, list):
+            # convert to tuple
+            value = tuple(value)
+        elif dt.kind == "f" and isinstance(value, str) and value == "nan":
+            value = np.nan
+        else:
+            # use as is
+            pass
+        arr = np.asarray(value, dtype=dt.base)
+    return arr[()]
+
+
+def squeezeArray(data):
+    """
+    Reduce dimensions by removing any 1-extent dimensions.
+    Just return input if no 1-extent dimensions
+
+    Note: only works with ndarrays (for now at least)
+    """
+    if not isinstance(data, np.ndarray):
+        raise TypeError("expected ndarray")
+    if len(data.shape) <= 1:
+        return data
+    can_reduce = True
+    for extent in data.shape:
+        if extent == 1:
+            can_reduce = True
+        break
+    if can_reduce:
+        data = data.squeeze()
+    return data
+
+
+class IndexIterator(object):
+    """
+    Class to iterate through list of chunks of a given dataset
+    """
+
+    def __init__(self, shape, sel=None):
+        self._shape = shape
+        self._rank = len(self._shape)
+        self._stop = False
+
+        if self._rank < 1:
+            raise ValueError("IndexIterator can not be used on arrays of zero rank")
+
+        if sel is None:
+            # select over entire dataset
+            slices = []
+            for dim in range(self._rank):
+                slices.append(slice(0, self._shape[dim]))
+            self._sel = tuple(slices)
+        else:
+            if isinstance(sel, slice):
+                self._sel = (sel,)
+            else:
+                self._sel = sel
+        if len(self._sel) != self._rank:
+            raise ValueError("Invalid selection - selection region must have same rank as shape")
+        self._index = []
+        for dim in range(self._rank):
+            s = self._sel[dim]
+            if s.start < 0 or s.stop > self._shape[dim] or s.stop <= s.start:
+                raise ValueError(
+                    "Invalid selection - selection region must be within dataset space"
+                )
+            self._index.append(s.start)
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        if self._stop:
+            raise StopIteration()
+        # bump up the last index and carry forward if we run outside the selection
+        dim = self._rank - 1
+        ret_index = self._index.copy()
+        while True:
+            s = self._sel[dim]
+            if s.step:
+                step = s.step
+            else:
+                step = 1
+            self._index[dim] += step
+
+            if self._index[dim] < s.stop:
+                # we still have room to extend along this dimensions
+                break
+
+            # reset to the start and continue iterating with higher dimension
+            self._index[dim] = s.start
+            dim -= 1
+            if dim < 0:
+                # ran past last index, stop iteration on next run
+                self._stop = True
+
+        return tuple(ret_index)
+
+
+def ndarray_compare(arr1, arr2):
+    # compare two numpy arrays.
+    # return true if the same (exclusive of null vs. empty array)
+    # false otherwise
+    # TBD: this is slow for multi-megabyte vlen arrays, needs to be optimized
+    if not isinstance(arr1, np.ndarray) and not isinstance(arr2, np.ndarray):
+        if not isinstance(arr1, np.void) and not isinstance(arr2, np.void):
+            return arr1 == arr2
+        if isinstance(arr1, np.void) and not isinstance(arr2, np.void):
+            if arr1.size == 0 and not arr2:
+                return True
+            else:
+                return False
+        if not isinstance(arr1, np.void) and isinstance(arr2, np.void):
+            if not arr1 and arr2.size == 0:
+                return True
+            else:
+                return False
+        # both np.voids
+        if arr1.size != arr2.size:
+            return False
+
+        if len(arr1) != len(arr2):
+            return False
+
+        for i in range(len(arr1)):
+            if not ndarray_compare(arr1[i], arr2[i]):
+                return False
+        return True
+
+    if isinstance(arr1, np.ndarray) and not isinstance(arr2, np.ndarray):
+        # same only if arr1 is empty and arr2 is 0
+        if arr1.size == 0 and not arr2:
+            return True
+        else:
+            return False
+    if not isinstance(arr1, np.ndarray) and isinstance(arr2, np.ndarray):
+        # same only if arr1 is empty and arr2 size is 0
+        if not arr1 and arr2.size == 0:
+            return True
+        else:
+            return False
+
+    # two ndarrays...
+    if arr1.shape != arr2.shape:
+        return False
+    if arr2.dtype != arr2.dtype:
+        return False
+
+    if isVlen(arr1.dtype):
+        # need to compare element by element
+
+        nElements = np.prod(arr1.shape)
+        arr1 = arr1.reshape((nElements,))
+        arr2 = arr2.reshape((nElements,))
+        for i in range(nElements):
+            if not ndarray_compare(arr1[i], arr2[i]):
+                return False
+        return True
+    else:
+        # can just us np array_compare
+        return np.array_equal(arr1, arr2)
+
+
+def getBroadcastShape(mshape, element_count):
+    # if element_count is less than the number of elements
+    # defined by mshape, return a numpy compatible broadcast
+    # shape that contains element_count elements.
+    # If non exists return None
+
+    if np.prod(mshape) == element_count:
+        return None
+
+    if element_count == 1:
+        # this always works
+        return [1,]
+
+    bcshape = []
+    rank = len(mshape)
+    for n in range(rank - 1):
+        bcshape.insert(0, mshape[rank - n - 1])
+        if element_count == np.prod(bcshape):
+            return bcshape  # have a match
+
+    return None  # no broadcast found
diff --git a/src/h5json/config.py b/src/h5json/config.py
new file mode 100755
index 0000000..b7602ff
--- /dev/null
+++ b/src/h5json/config.py
@@ -0,0 +1,213 @@
+##############################################################################
+# Copyright by The HDF Group.                                                #
+# All rights reserved.                                                       #
+#                                                                            #
+# This file is part of HSDS (HDF5 Scalable Data Service), Libraries and      #
+# Utilities.  The full HSDS copyright notice, including                      #
+# terms governing use, modification, and redistribution, is contained in     #
+# the file COPYING, which can be found at the root of the source code        #
+# distribution tree.  If you do not have access to this file, you may        #
+# request a copy from help@hdfgroup.org.                                     #
+##############################################################################
+import os
+import json
+
+
+class Config:
+    """
+    User Config state
+    """
+    _cfg = {}  # global state
+
+    def __init__(self, config_file=None, **kwargs):
+        if Config._cfg:
+            return  # already initialized
+        if config_file:
+            self._config_file = config_file
+        elif os.path.isfile(".hscfg"):
+            self._config_file = ".hscfg"
+        else:
+            self._config_file = os.path.expanduser("~/.hscfg")
+        # process config file if found
+        if os.path.isfile(self._config_file):
+            line_number = 0
+            with open(self._config_file) as f:
+                for line in f:
+                    line_number += 1
+                    s = line.strip()
+                    if not s:
+                        continue
+                    if s[0] == '#':
+                        # comment line
+                        continue
+                    fields = s.split('=')
+                    if len(fields) < 2:
+                        print(f"config file: {self._config_file} line: {line_number} is not valid")
+                        continue
+                    k = fields[0].strip()
+                    v = fields[1].strip()
+                    if k == "complex_names":
+                        self.complex_names = v
+                    elif k == "bool_names":
+                        self.bool_names = v
+                    elif k == "track_order":
+                        self.track_order = v
+                    else:
+                        Config._cfg[k] = v
+
+        # add standard keys if not already picked up
+        for k in ("hs_endpoint", "hs_username", "hs_password", "hs_api_key"):
+            if k not in Config._cfg:
+                Config._cfg[k] = ""
+
+        # override any config values with environment variable if found
+        for k in Config._cfg.keys():
+            if k.upper() in os.environ:
+                Config._cfg[k] = os.environ[k.upper()]
+
+        # update any values that are passed in to the constructor
+        for k in kwargs.keys():
+            Config._cfg[k] = kwargs[k]
+
+        # finally, set defaults for any expected keys that are not already set
+        for k in ("hs_endpoint", "hs_username", "hs_endpoint"):
+            if k not in Config._cfg:
+                Config._cfg[k] = None
+        if "bool_names" not in Config._cfg:
+            Config._cfg["bool_names"] = (b"FALSE", b"TRUE")
+        if "complex_names" not in Config._cfg:
+            Config._cfg["complex_names"] = ("r", "i")
+        if "track_order" not in Config._cfg:
+            Config._cfg["track_order"] = False
+
+    def __getitem__(self, name):
+        """ Get a config item  """
+        if name not in Config._cfg:
+            if name.upper() in os.environ:
+                Config._cfg[name] = os.environ[name.upper()]
+            else:
+                return None
+        return Config._cfg[name]
+
+    def get(self, name, default):
+        """ return config value for name or default if None """
+        val = self.__getitem__(name)
+        if val is None:
+            return default
+        else:
+            return default
+
+    def __setitem__(self, name, obj):
+        """ set config item """
+        Config._cfg[name] = obj
+
+    def __delitem__(self, name):
+        """ Delete option. """
+        del Config._cfg[name]
+
+    def __len__(self):
+        return len(Config._cfg)
+
+    def __iter__(self):
+        """ Iterate over config names """
+        keys = Config._cfg.keys()
+        for key in keys:
+            yield key
+
+    def __contains__(self, name):
+        return name in Config._cfg
+
+    def __repr__(self):
+        return json.dumps(Config._cfg)
+
+    def keys(self):
+        return Config._cfg.keys()
+
+    @property
+    def hs_endpoint(self):
+        return Config._cfg.get("hs_endpoint")
+
+    @property
+    def hs_username(self):
+        return Config._cfg.get("hs_username")
+
+    @property
+    def hs_password(self):
+        return Config._cfg.get("hs_password")
+
+    @property
+    def hs_api_key(self):
+        return Config._cfg.get("hs_api_key")
+
+    @property
+    def bool_names(self):
+        if "bool_names" in Config._cfg:
+            names = Config._cfg["bool_names"]
+        else:
+            names = (b"FALSE", b"TRUE")
+        return names
+
+    @bool_names.setter
+    def bool_names(self, value):
+        if isinstance(value, str):
+            names = value.split(())
+            if len(names) < 2:
+                raise ValueError("bool_names must have two items")
+            elif len(names) == 2:
+                pass
+            else:
+                names = names[:2]  # just use the first two items
+        elif len(value) != 2:
+            raise ValueError("expected two-element list for bool_names")
+        else:
+            names = value
+        Config._cfg["bool_names"] = tuple(names)
+
+    @property
+    def complex_names(self):
+        if "complex_names" in Config._cfg:
+            names = Config._cfg["complex_names"]
+        else:
+            names = ("r", "i")
+        return names
+
+    @complex_names.setter
+    def complex_names(self, value):
+        if isinstance(value, str):
+            names = value.split()
+            if len(names) < 2:
+                raise ValueError("complex_names must have two items")
+            elif len(names) == 2:
+                pass
+            else:
+                names = names[:2]  # just use the first two items
+        elif len(value) != 2:
+            raise ValueError("complex_names must have two values")
+        else:
+            names = value
+
+        Config._cfg["complex_names"] = tuple(names)
+
+    @property
+    def track_order(self):
+        if "track_order" in Config._cfg:
+            track = Config._cfg["track_order"]
+        else:
+            track = False
+        return track
+
+    @track_order.setter
+    def track_order(self, value):
+        if isinstance(value, str):
+            tokens = value.split()
+            if len(tokens) == 0:
+                track = False
+            else:
+                track = bool(tokens[0])  # strip any comments
+        else:
+            track = bool(value)
+        Config._cfg["track_order"] = track
+
+
+def get_config(config_file=None, **kwargs):
+    return Config(config_file=config_file, **kwargs)
diff --git a/src/h5json/dset_util.py b/src/h5json/dset_util.py
new file mode 100644
index 0000000..5b10323
--- /dev/null
+++ b/src/h5json/dset_util.py
@@ -0,0 +1,42 @@
+##############################################################################
+# Copyright by The HDF Group.                                                #
+# All rights reserved.                                                       #
+#                                                                            #
+# This file is part of HSDS (HDF5 REST Server) Service, Libraries and        #
+# Utilities.  The full HDF5 REST Server copyright notice, including          #
+# terms governing use, modification, and redistribution, is contained in     #
+# the file COPYING, which can be found at the root of the source code        #
+# distribution tree.  If you do not have access to this file, you may        #
+# request a copy from help@hdfgroup.org.                                     #
+##############################################################################
+
+import time
+
+
+def resize_dataset(dset_json, shape):
+    shape_json = dset_json["shape"]
+    shape_class = shape_json["class"]
+    if shape_class != "H5S_SIMPLE":
+        raise TypeError(f"dataset with shape class: {shape_class} cannot be resized")
+    if len(shape_json["dims"]) != len(shape):
+        raise ValueError("Resize shape parameter doesn't match dataset's rank")
+    if "maxdims" not in shape_json:
+        raise ValueError("Dataset is not resizable")
+    dims = shape_json["dims"]
+    maxdims = shape_json["maxdims"]
+
+    if shape_json["dims"] == list(shape):
+        # no change, just return
+        return
+    for i in range(len(dims)):
+        extent = shape[i]
+        if extent < 0:
+            raise ValueError("dimensions can't be negative")
+        if maxdims[i] == "H5S_UNLIMITED":
+            # any positive extent is ok
+            continue
+        if extent > maxdims[i]:
+            raise ValueError(f"extent for dimension {i} can't be larger than {maxdims[i]}")
+
+    shape_json["dims"] = list(shape)
+    dset_json["modified"] = time.time()
diff --git a/src/h5json/filters.py b/src/h5json/filters.py
new file mode 100644
index 0000000..cda3817
--- /dev/null
+++ b/src/h5json/filters.py
@@ -0,0 +1,55 @@
+##############################################################################
+# Copyright by The HDF Group.                                                #
+# All rights reserved.                                                       #
+#                                                                            #
+# This file is part of H5Serv (HDF5 REST Server) Service, Libraries and      #
+# Utilities.  The full HDF5 REST Server copyright notice, including          #
+# terms governing use, modification, and redistribution, is contained in     #
+# the file COPYING, which can be found at the root of the source code        #
+# distribution tree.  If you do not have access to this file, you may        #
+# request a copy from help@hdfgroup.org.                                     #
+##############################################################################
+
+import h5py
+
+_HDF_FILTERS = {
+    1: {"class": "H5Z_FILTER_DEFLATE", "alias": "gzip", "options": ["level"]},
+    2: {"class": "H5Z_FILTER_SHUFFLE", "alias": "shuffle"},
+    3: {"class": "H5Z_FILTER_FLETCHER32", "alias": "fletcher32"},
+    4: {
+        "class": "H5Z_FILTER_SZIP",
+        "alias": "szip",
+        "options": ["bitsPerPixel", "coding", "pixelsPerBlock", "pixelsPerScanLine"],
+    },
+    5: {"class": "H5Z_FILTER_NBIT"},
+    6: {
+        "class": "H5Z_FILTER_SCALEOFFSET",
+        "alias": "scaleoffset",
+        "options": ["scaleType", "scaleOffset"],
+    },
+    32000: {"class": "H5Z_FILTER_LZF", "alias": "lzf"},
+}
+
+_HDF_FILTER_OPTION_ENUMS = {
+    "coding": {
+        h5py.h5z.SZIP_EC_OPTION_MASK: "H5_SZIP_EC_OPTION_MASK",
+        h5py.h5z.SZIP_NN_OPTION_MASK: "H5_SZIP_NN_OPTION_MASK",
+    },
+    "scaleType": {
+        h5py.h5z.SO_FLOAT_DSCALE: "H5Z_SO_FLOAT_DSCALE",
+        h5py.h5z.SO_FLOAT_ESCALE: "H5Z_SO_FLOAT_ESCALE",
+        h5py.h5z.SO_INT: "H5Z_SO_INT",
+    },
+}
+
+# h5py supported filters
+_H5PY_FILTERS = {
+    "gzip": 1,
+    "shuffle": 2,
+    "fletcher32": 3,
+    "szip": 4,
+    "scaleoffset": 6,
+    "lzf": 32000,
+}
+
+_H5PY_COMPRESSION_FILTERS = ("gzip", "lzf", "szip")
diff --git a/src/h5json/h5py_util.py b/src/h5json/h5py_util.py
new file mode 100644
index 0000000..ebe2dbd
--- /dev/null
+++ b/src/h5json/h5py_util.py
@@ -0,0 +1,109 @@
+##############################################################################
+# Copyright by The HDF Group.                                                #
+# All rights reserved.                                                       #
+#                                                                            #
+# This file is part of H5Serv (HDF5 REST Server) Service, Libraries and      #
+# Utilities.  The full HDF5 REST Server copyright notice, including          #
+# terms governing use, modification, and redistribution, is contained in     #
+# the file COPYING, which can be found at the root of the source code        #
+# distribution tree.  If you do not have access to this file, you may        #
+# request a copy from help@hdfgroup.org.                                     #
+##############################################################################
+
+import h5py
+import numpy as np
+
+from . import hdf5dtype
+
+
+def is_reference(val):
+    """ Return True if the type or value is a Reference """
+
+    if isinstance(val, object) and val.__class__.__name__ == "Reference":
+        return True
+    elif isinstance(val, type) and val.__name__ == "Reference":
+        return True
+    else:
+        return False
+
+
+def is_regionreference(val):
+    """ Return True if the type or value is a RegionReference """
+
+    if isinstance(val, object) and val.__class__.__name__ == "RegionReference":
+        return True
+    elif isinstance(val, type) and val.__name__ == "RegionReference":
+        return True
+
+    return False
+
+
+def has_reference(dtype):
+    """ return True if the dtype (or a sub-type) is a Reference type """
+    has_ref = False
+    if not isinstance(dtype, np.dtype):
+        return False
+    if len(dtype) > 0:
+        for name in dtype.fields:
+            item = dtype.fields[name]
+            if has_reference(item[0]):
+                has_ref = True
+                break
+    elif dtype.metadata and "ref" in dtype.metadata:
+        basedt = dtype.metadata["ref"]
+        has_ref = is_reference(basedt)
+    elif dtype.metadata and "vlen" in dtype.metadata:
+        basedt = dtype.metadata["vlen"]
+        has_ref = has_reference(basedt)
+    return has_ref
+
+
+def convert_dtype(srcdt, to_h5py=True):
+    """Return a dtype based on input dtype, converting any Reference types from
+    h5py style to h5json and vice-versa.
+    """
+
+    if len(srcdt) > 0:
+        fields = []
+        for name in srcdt.fields:
+            item = srcdt.fields[name]
+            # item is a tuple of dtype and integer offset
+            field_dt = convert_dtype(item[0], to_h5py=to_h5py)
+            fields.append((name, field_dt))
+        tgt_dt = np.dtype(fields)
+    else:
+        # check if this a "special dtype"
+        if srcdt.metadata and "ref" in srcdt.metadata:
+            ref = srcdt.metadata["ref"]
+            if is_reference(ref):
+                if to_h5py:
+                    tgt_dt = h5py.special_dtype(ref=h5py.Reference)
+                else:
+                    tgt_dt = hdf5dtype.special_dtype(ref=hdf5dtype.Reference)
+            elif is_regionreference(ref):
+                if to_h5py:
+                    tgt_dt = h5py.special_dtype(ref=h5py.RegionReference)
+                else:
+                    tgt_dt = hdf5dtype.special_dtype(ref=hdf5dtype.RegionReference)
+            else:
+                msg = f"Unexpected ref type: {srcdt}"
+                raise TypeError(msg)
+        elif srcdt.metadata and "vlen" in srcdt.metadata:
+            src_vlen = srcdt.metadata["vlen"]
+            if isinstance(src_vlen, np.dtype):
+                tgt_base = convert_dtype(src_vlen, to_h5py=to_h5py)
+            else:
+                tgt_base = src_vlen
+            if to_h5py:
+                tgt_dt = h5py.special_dtype(vlen=tgt_base)
+            else:
+                tgt_dt = hdf5dtype.special_dtype(vlen=tgt_base)
+        elif srcdt.kind == "U":
+            # use vlen for unicode strings
+            if to_h5py:
+                tgt_dt = h5py.special_dtype(vlen=str)
+            else:
+                tgt_dt = hdf5dtype.special_dtype(vlen=str)
+        else:
+            tgt_dt = srcdt
+    return tgt_dt
diff --git a/src/h5json/h5pystore/__init__.py b/src/h5json/h5pystore/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/h5json/h5pystore/h5py_reader.py b/src/h5json/h5pystore/h5py_reader.py
new file mode 100644
index 0000000..bc4b582
--- /dev/null
+++ b/src/h5json/h5pystore/h5py_reader.py
@@ -0,0 +1,516 @@
+##############################################################################
+# Copyright by The HDF Group.                                                #
+# All rights reserved.                                                       #
+#                                                                            #
+# This file is part of H5Serv (HDF5 REST Server) Service, Libraries and      #
+# Utilities.  The full HDF5 REST Server copyright notice, including          #
+# terms governing use, modification, and redistribution, is contained in     #
+# the file COPYING, which can be found at the root of the source code        #
+# distribution tree.  If you do not have access to this file, you may        #
+# request a copy from help@hdfgroup.org.                                     #
+##############################################################################
+import h5py
+import numpy as np
+import logging
+
+from ..objid import createObjId, getCollectionForId
+from ..hdf5dtype import getTypeItem, isOpaqueDtype
+from ..array_util import bytesArrayToList
+from .. import selections
+from .. import filters
+
+from ..h5py_util import is_reference, is_regionreference, has_reference, convert_dtype
+from ..h5reader import H5Reader
+
+
+class H5pyReader(H5Reader):
+    """
+    This class can be used by HDF5DB to read content from an HDF5 file (using h5py)
+    """
+
+    def _copy_element(self, val, src_dt, tgt_dt, fin=None):
+        """ convert the given dataset or attribute element from h5py to h5json equivalent """
+
+        out = None
+        if len(src_dt) > 0:
+            out_fields = []
+            i = 0
+            for name in src_dt.fields:
+                field_src_dt = src_dt.fields[name][0]
+                field_tgt_dt = tgt_dt.fields[name][0]
+                field_val = val[i]
+                i += 1
+                out_field = self._copy_element(field_val, field_src_dt, field_tgt_dt, fin=fin)
+                out_fields.append(out_field)
+            out = tuple(out_fields)
+        elif src_dt.metadata and "ref" in src_dt.metadata:
+            if not tgt_dt.metadata or "ref" not in tgt_dt.metadata:
+                raise TypeError(f"Expected tgt dtype to be ref, but got: {tgt_dt}")
+            ref = tgt_dt.metadata["ref"]
+            if is_reference(ref):
+                # initialize out to null ref
+                out = h5py.Reference()  # null h5py ref
+
+                if ref and val:
+                    try:
+                        fin_obj = fin[val]
+                    except AttributeError as ae:
+                        msg = f"Unable able to get obj for ref value: {ae}"
+                        self.log.error(msg)
+                        raise ValueError(msg)
+
+                    addr = h5py.h5o.get_info(fin_obj.id).addr
+                    if addr not in self._addr_map:
+                        msg = f"No object found for ref object: {fin_obj.name}"
+                        self.log.warning(msg)
+                        out = ""
+                    else:
+                        obj_id = self._addr_map[addr]
+                        collection = getCollectionForId(obj_id)
+                        out = f"{collection}/{obj_id}"
+
+            elif is_regionreference(ref):
+                self.log.warning("region reference not supported")
+                # TBD: just return a null region reference till we have support
+                out = ""
+            else:
+                raise TypeError(f"Unexpected ref type: {type(ref)}")
+        elif src_dt.metadata and "vlen" in src_dt.metadata:
+            if not isinstance(val, np.ndarray):
+                raise TypeError(f"Expecting ndarray or vlen element, but got: {type(val)}")
+            if not tgt_dt.metadata or "vlen" not in tgt_dt.metadata:
+                raise TypeError(f"Expected tgt dtype to be vlen, but got: {tgt_dt}")
+            src_vlen_dt = src_dt.metadata["vlen"]
+            tgt_vlen_dt = tgt_dt.metadata["vlen"]
+            if has_reference(src_vlen_dt):
+                if len(val.shape) == 0:
+                    # scalar array
+                    e = val[()]
+                    v = self._copy_element(e, src_vlen_dt, tgt_vlen_dt, fin=fin)
+                    out = np.array(v, dtype=tgt_dt)
+                else:
+                    out = np.zeros(val.shape, dtype=tgt_dt)
+                    for i in range(len(out)):
+                        e = val[i]
+                        out[i] = self._copy_element(e, src_vlen_dt, tgt_vlen_dt, fin=fin)
+            else:
+                # can just directly copy the array
+                out = np.zeros(val.shape, dtype=tgt_dt)
+                out[...] = val[...]
+        else:
+            out = val  # can just copy as is
+        return out
+
+    def _copy_array(self, src_arr, fin=None):
+        """Copy the numpy array to a new array.
+            Convert any reference type to point to item in the target's hierarchy.
+        """
+
+        if not isinstance(src_arr, np.ndarray):
+            raise TypeError(f"Expecting ndarray, but got: {src_arr}")
+        tgt_dt = convert_dtype(src_arr.dtype, to_h5py=False)
+        tgt_arr = np.zeros(src_arr.shape, dtype=tgt_dt)
+
+        if has_reference(src_arr.dtype):
+            # flatten array to simplify iteration
+            count = int(np.prod(src_arr.shape))
+            tgt_arr_flat = tgt_arr.reshape((count,))
+            src_arr_flat = src_arr.reshape((count,))
+            for i in range(count):
+                e = src_arr_flat[i]
+                element = self._copy_element(e, src_arr.dtype, tgt_dt, fin=fin)
+                tgt_arr_flat[i] = element
+            tgt_arr = tgt_arr_flat.reshape(src_arr.shape)
+        else:
+            # can just copy the entire array
+            tgt_arr[...] = src_arr[...]
+        return tgt_arr
+
+    def visit(self, path, obj):
+        name = obj.__class__.__name__
+        self.log.info(f"visit: {path} name: {name}")
+
+        obj_id = createObjId(obj_type=name, root_id=self._root_id)  # create uuid
+
+        self._id_map[obj_id] = obj
+
+        addr = h5py.h5o.get_info(obj.id).addr
+        self._addr_map[addr] = obj_id
+
+    def __init__(
+        self,
+        filepath,
+        app_logger=None
+    ):
+        self._id_map = {}
+        self._addr_map = {}
+        if app_logger:
+            self.log = app_logger
+        else:
+            self.log = logging.getLogger()
+        if not h5py.is_hdf5(filepath):
+            self.log.warn(f"File: {filepath} is not an HDF5 file")
+            raise IOError("not an HDF5 file")
+        super().__init__(filepath, app_logger=app_logger)
+        self._f = None
+        self._root_id = None
+
+    def open(self):
+        if self._f:
+            return  # already open
+        if self._id_map:
+            return  # objects already loaded
+        if not self._root_id:
+            # get the root id from db if available
+            if self.db.root_id:
+                self.log.info("H5pyReader: got root_id from db")
+                self._root_id = self.db.root_id
+            else:
+                self.log.info("H5pyReader: creating root id")
+                self._root_id = createObjId(obj_type="groups")
+
+        f = h5py.File(self.filepath)
+        self._f = f
+        self._id_map[self._root_id] = f
+        addr = h5py.h5o.get_info(f.id).addr
+        self._addr_map[addr] = self._root_id
+        f.visititems(self.visit)
+
+        return self._root_id
+
+    def close(self):
+        if self._f:
+            self._f.close()
+            self._f = None
+
+    def isClosed(self):
+        return False if self._f else True
+
+    def get_root_id(self):
+        """ Return root id """
+        return self._root_id
+
+    def getObjIdByAddress(self, addr):
+        if addr in self._addr_map:
+            return self._addr_map[addr]
+        else:
+            return None
+
+    def getAttribute(self, obj_id, name, include_data=True):
+        """ Return JSON for the given attribute """
+
+        obj = self._id_map[obj_id]
+
+        if name not in obj.attrs:
+            msg = f"Attribute: [{name}] not found in object: {obj.name}"
+            self.log.info(msg)
+            return None
+
+        # get the attribute!
+        attrObj = h5py.h5a.open(obj.id, np.bytes_(name))
+
+        item = {}
+
+        # check if the dataset is using a committed type
+        typeid = attrObj.get_type()
+        type_item = None
+        if h5py.h5t.TypeID.committed(typeid):
+            type_uuid = None
+            addr = h5py.h5o.get_info(typeid).addr
+            type_uuid = self.getObjIdByAddress(addr)
+            committedType = self._id_map[type_uuid]
+            type_item = getTypeItem(committedType.dtype)
+            type_item["id"] = type_uuid
+        else:
+            type_item = getTypeItem(attrObj.dtype)
+        item["type"] = type_item
+
+        shape_item = {}
+        if attrObj.shape is None or attrObj.get_storage_size() == 0:
+            # If storage size is 0, assume this is a null space obj
+            # See: h5py issue https://github.com/h5py/h5py/issues/279
+            shape_item["class"] = "H5S_NULL"
+        else:
+            if attrObj.shape:
+                shape_item["class"] = "H5S_SIMPLE"
+                shape_item["dims"] = attrObj.shape
+            else:
+                shape_item["class"] = "H5S_SCALAR"
+
+        item["shape"] = shape_item
+        if shape_item["class"] == "H5S_NULL":
+            include_data = False
+        elif isinstance(type_item, dict) and type_item["class"] == "H5T_OPAQUE":
+            # TBD - don't include data for OPAQUE until JSON serialization
+            # issues are addressed
+            include_data = False
+        else:
+            pass  # use include_data parameter
+
+        if include_data:
+            try:
+                data = obj.attrs[name]
+                # convert from h5py to h5json
+                data = self._copy_array(data, fin=obj.file)
+            except TypeError:
+                self.log.warning("type error reading attribute")
+
+        if include_data and data is not None:
+            value = bytesArrayToList(data)
+            item["value"] = value
+        else:
+            pass  # no data
+
+        # timestamps will be added by getAttributeItem()
+        return item
+
+    def getAttributes(self, obj_id, include_data=True):
+        h5obj = self._id_map[obj_id]
+        self.log.info(f"getAttributes: {obj_id} include_data={include_data}")
+        items = {}  # with python 3.7+, this will maintain the attribute order we got from h5py
+        attrs = h5obj.attrs
+        for name in attrs:
+            item = self.getAttribute(obj_id, name, include_data=include_data)
+            items[name] = item
+
+        return items
+
+    def _getLink(self, parent, link_name):
+        if link_name not in parent:
+            return None
+
+        item = {"title": link_name}
+        # get the link object, one of HardLink, SoftLink, or ExternalLink
+        try:
+            linkObj = parent.get(link_name, None, False, True)
+            linkClass = linkObj.__class__.__name__
+        except TypeError:
+            # UDLink? set class as 'user'
+            linkClass = "UDLink"  # user defined links
+            item["class"] = "H5L_TYPE_USER_DEFINED"
+        if linkClass == "SoftLink":
+            item["class"] = "H5L_TYPE_SOFT"
+            item["h5path"] = linkObj.path
+        elif linkClass == "ExternalLink":
+            item["class"] = "H5L_TYPE_EXTERNAL"
+            item["h5path"] = linkObj.path
+            item["file"] = linkObj.filename
+        elif linkClass == "HardLink":
+            # Hardlink doesn't have any properties itself, just get the linked
+            # object
+            obj = parent[link_name]
+            addr = h5py.h5o.get_info(obj.id).addr
+            item["class"] = "H5L_TYPE_HARD"
+            if addr not in self._addr_map:
+                self.log.error(f"expected to find addr for link {link_name} in addr_map")
+                item["id"] = None
+            else:
+                item["id"] = self._addr_map[addr]
+
+        return item
+
+    def _getLinks(self, grp):
+        items = {}  # with python 3.7+, this will maintain the link order we got from h5py
+        for link_name in grp:
+            item = self._getLink(grp, link_name)
+            items[link_name] = item
+        return items
+
+    def _getGroup(self, grp, include_links=True):
+        self.log.info(f"_getGroup alias: [{grp.name}]")
+
+        item = {"alias": grp.name}
+
+        if include_links:
+            links = self._getLinks(grp)
+            item["links"] = links
+        return item
+
+    def _getDatatype(self, ctype, include_attrs=True):
+        self.log.info(f"getDatatype alias: ]{ctype.name}")
+        item = {"alias": ctype.name}
+        item["type"] = getTypeItem(ctype.dtype)
+
+        return item
+
+    def _getHDF5DatasetCreationProperties(self, dset, type_class):
+        """ Get dataset creation properties maintained by HDF5 library """
+
+        #
+        # Fill in creation properties
+        #
+        creationProps = {}
+        plist = h5py.h5d.DatasetID.get_create_plist(dset.id)
+
+        # alloc time
+        nAllocTime = plist.get_alloc_time()
+        if nAllocTime == h5py.h5d.ALLOC_TIME_DEFAULT:
+            creationProps["allocTime"] = "H5D_ALLOC_TIME_DEFAULT"
+        elif nAllocTime == h5py.h5d.ALLOC_TIME_LATE:
+            creationProps["allocTime"] = "H5D_ALLOC_TIME_LATE"
+        elif nAllocTime == h5py.h5d.ALLOC_TIME_EARLY:
+            creationProps["allocTime"] = "H5D_ALLOC_TIME_EARLY"
+        elif nAllocTime == h5py.h5d.ALLOC_TIME_INCR:
+            creationProps["allocTime"] = "H5D_ALLOC_TIME_INCR"
+        else:
+            self.log.warning(f"Unknown alloc time value: {nAllocTime}")
+
+        # fill time
+        nFillTime = plist.get_fill_time()
+        if nFillTime == h5py.h5d.FILL_TIME_ALLOC:
+            creationProps["fillTime"] = "H5D_FILL_TIME_ALLOC"
+        elif nFillTime == h5py.h5d.FILL_TIME_NEVER:
+            creationProps["fillTime"] = "H5D_FILL_TIME_NEVER"
+        elif nFillTime == h5py.h5d.FILL_TIME_IFSET:
+            creationProps["fillTime"] = "H5D_FILL_TIME_IFSET"
+        else:
+            self.log.warning(f"unknown fill time value: {nFillTime}")
+
+        if type_class == "H5T_OPAQUE":
+            # TBD: store opaque fill value as a hex string
+            self.log.warning("Opaque fill value not supported")
+        else:
+            if plist.fill_value_defined() == h5py.h5d.FILL_VALUE_USER_DEFINED:
+                creationProps["fillValue"] = bytesArrayToList(dset.fillvalue)
+
+        # layout
+        nLayout = plist.get_layout()
+        if nLayout == h5py.h5d.COMPACT:
+            creationProps["layout"] = {"class": "H5D_COMPACT"}
+        elif nLayout == h5py.h5d.CONTIGUOUS:
+            creationProps["layout"] = {"class": "H5D_CONTIGUOUS"}
+        elif nLayout == h5py.h5d.CHUNKED:
+            creationProps["layout"] = {"class": "H5D_CHUNKED", "dims": dset.chunks}
+        else:
+            self.log.warning(f"Unknown layout value: {nLayout}")
+
+        num_filters = plist.get_nfilters()
+        filter_props = []
+        if num_filters:
+            for n in range(num_filters):
+                filter_info = plist.get_filter(n)
+                opt_values = filter_info[2]
+                filter_prop = {}
+                filter_id = filter_info[0]
+                filter_prop["id"] = filter_id
+                if filter_info[3]:
+                    filter_prop["name"] = bytesArrayToList(filter_info[3])
+                if filter_id in filters._HDF_FILTERS:
+                    hdf_filter = filters._HDF_FILTERS[filter_id]
+                    filter_prop["class"] = hdf_filter["class"]
+                    if "options" in hdf_filter:
+                        filter_opts = hdf_filter["options"]
+                        for i in range(len(filter_opts)):
+                            if len(opt_values) <= i:
+                                break  # end of option values
+                            opt_value = opt_values[i]
+                            opt_value_enum = None
+                            option_name = filter_opts[i]
+                            if option_name in filters._HDF_FILTER_OPTION_ENUMS:
+                                option_enums = filters._HDF_FILTER_OPTION_ENUMS[option_name]
+                                if opt_value in option_enums:
+                                    opt_value_enum = option_enums[opt_value]
+                            if opt_value_enum:
+                                filter_prop[option_name] = opt_value_enum
+                            else:
+                                filter_prop[option_name] = opt_value
+                else:
+                    # custom filter
+                    filter_prop["class"] = "H5Z_FILTER_USER"
+                    if opt_values:
+                        filter_prop["parameters"] = opt_values
+                filter_props.append(filter_prop)
+            creationProps["filters"] = filter_props
+
+        return creationProps
+
+    def _getDataset(self, dset):
+        self.log.info(f"getDataset alias: [{dset.name}]")
+
+        item = {"alias": dset.name}
+
+        typeid = dset.id.get_type()
+        if h5py.h5t.TypeID.committed(typeid):
+            type_uuid = None
+            addr = h5py.h5o.get_info(typeid).addr
+            type_uuid = self.getObjIdByAddress(addr)
+            committedType = self.getObjectById(type_uuid)
+            type_item = committedType["type"]
+            type_item["id"] = type_uuid
+        else:
+            type_item = getTypeItem(dset.dtype)
+        item["type"] = type_item
+
+        shape_item = {}
+        if dset.shape is None:
+            # new with h5py 2.6, null space datasets will return None for shape
+            shape_item["class"] = "H5S_NULL"
+        elif len(dset.shape) == 0:
+            shape_item["class"] = "H5S_SCALAR"
+        else:
+            shape_item["class"] = "H5S_SIMPLE"
+            shape_item["dims"] = list(dset.shape)
+            maxshape = []
+            include_maxdims = False
+            for i in range(len(dset.shape)):
+                extent = 0
+                if len(dset.maxshape) > i:
+                    extent = dset.maxshape[i]
+                    if extent is None:
+                        extent = 0
+                    if extent > dset.shape[i] or extent == 0:
+                        include_maxdims = True
+                maxshape.append(extent)
+            if include_maxdims:
+                shape_item["maxdims"] = maxshape
+        item["shape"] = shape_item
+
+        item["cpl"] = self._getHDF5DatasetCreationProperties(dset, type_item["class"])
+
+        return item
+
+    def getObjectById(self, obj_id, include_attrs=True, include_links=True):
+        """ return object with given id """
+        if obj_id not in self._id_map:
+            raise KeyError(f"{obj_id} not found")
+        h5obj = self._id_map[obj_id]
+        if isinstance(h5obj, h5py.Group):
+            obj_json = self._getGroup(h5obj, include_links=include_links)
+        elif isinstance(h5obj, h5py.Dataset):
+            obj_json = self._getDataset(h5obj)
+        elif isinstance(h5obj, h5py.Datatype):
+            obj_json = self._getDatatype(h5obj)
+        else:
+            raise TypeError(f"unexpected object type: {type(h5obj)}")
+
+        if include_attrs:
+            attributes = self.getAttributes(obj_id)
+            obj_json["attributes"] = attributes
+
+        return obj_json
+
+    def getDatasetValues(self, dset_id, sel=None, dtype=None):
+        """
+        Get values from dataset identified by obj_id.
+        If a slices list or tuple is provided, it should have the same
+        number of elements as the rank of the dataset.
+        """
+
+        dset = self._id_map[dset_id]
+        self.log.info(f"getDatasetValues: {dset_id}")
+        if dset.shape is None:
+            # TBD: return something like h5py.Empty in this case?
+            return None
+        if isOpaqueDtype(dset.dtype):
+            # TBD: Opaque data not supported yet
+            return None
+        if sel is None or sel.select_type == selections.H5S_SELECT_ALL:
+            arr = dset[...]
+        elif isinstance(sel, selections.SimpleSelection):
+            arr = dset[sel.slices]
+        else:
+            raise NotImplementedError("selection type not supported")
+
+        # convert any h5py references to h5json references
+        arr = self._copy_array(arr, fin=dset.file)
+        return arr
diff --git a/src/h5json/h5pystore/h5py_writer.py b/src/h5json/h5pystore/h5py_writer.py
new file mode 100644
index 0000000..14942c1
--- /dev/null
+++ b/src/h5json/h5pystore/h5py_writer.py
@@ -0,0 +1,462 @@
+##############################################################################
+# Copyright by The HDF Group.                                                #
+# All rights reserved.                                                       #
+#                                                                            #
+# This file is part of H5Serv (HDF5 REST Server) Service, Libraries and      #
+# Utilities.  The full HDF5 REST Server copyright notice, including          #
+# terms governing use, modification, and redistribution, is contained in     #
+# the file COPYING, which can be found at the root of the source code        #
+# distribution tree.  If you do not have access to this file, you may        #
+# request a copy from help@hdfgroup.org.                                     #
+##############################################################################
+import h5py
+import numpy as np
+import time
+
+from ..objid import getCollectionForId, isValidUuid, createObjId
+from ..hdf5dtype import createDataType
+from ..h5py_util import is_reference, is_regionreference, has_reference, convert_dtype
+from ..array_util import jsonToArray
+from .. import selections
+from .. import filters
+from ..h5writer import H5Writer
+
+
+class H5pyWriter(H5Writer):
+    """
+    This class saves state from the Hdf5Db class into an HDF5 file.
+    """
+
+    def __init__(
+        self,
+        filepath,
+        append=False,
+        no_data=False,
+        app_logger=None
+    ):
+        super().__init__(filepath, append=append, no_data=no_data, app_logger=app_logger)
+        self._id_map = {}
+        if append:
+            self._init = False
+        else:
+            self._init = True
+        self._flush_time = 0.0
+        self._f = None  # h5py file handle
+
+    def _copy_element(self, val, src_dt, tgt_dt, fout=None):
+        """ convert the given dataset or attribute element to h5py equivalent """
+        out = None
+        if len(src_dt) > 0:
+            out_fields = []
+            i = 0
+            for name in src_dt.fields:
+                field_src_dt = src_dt.fields[name][0]
+                field_tgt_dt = tgt_dt.fields[name][0]
+                field_val = val[i]
+                i += 1
+                out_field = self._copy_element(field_val, field_src_dt, field_tgt_dt)
+                out_fields.append(out_field)
+            out = tuple(out_fields)
+        elif src_dt.metadata and "ref" in src_dt.metadata:
+            if not tgt_dt.metadata or "ref" not in tgt_dt.metadata:
+                raise TypeError(f"Expected tgt dtype to be ref, but got: {tgt_dt}")
+            ref = tgt_dt.metadata["ref"]
+            if is_reference(ref):
+                # initialize out to null ref
+                out = h5py.Reference()  # null h5py ref
+
+                if ref and val:
+                    if isinstance(val, bytes):
+                        val = val.decode("ascii")
+                    # strip out collection prefix if present
+                    parts = val.split("/")
+                    obj_uuid = parts[-1]
+                    if not isValidUuid(obj_uuid):
+                        msg = f"invalid uuid: {obj_uuid}"
+                        self.log.warning(msg)
+                    elif obj_uuid not in self._id_map:
+                        self.log.warning(f"ref object {obj_uuid} not found")
+                    else:
+                        h5path = self._id_map[obj_uuid]
+                        try:
+                            obj = fout[h5path]
+                            out = obj.ref
+                        except KeyError:
+                            self.log.warning(f"referenced object: {h5path} not found")
+
+            elif is_regionreference(ref):
+                self.log.warning("region reference not supported")
+                # TBD: just return a null region reference till we have support
+                out = h5py.RegionReference()
+            else:
+                raise TypeError(f"Unexpected ref type: {type(ref)}")
+        elif src_dt.metadata and "vlen" in src_dt.metadata:
+            if not tgt_dt.metadata or "vlen" not in tgt_dt.metadata:
+                raise TypeError(f"Expected tgt dtype to be vlen, but got: {tgt_dt}")
+            src_vlen_dt = src_dt.metadata["vlen"]
+            tgt_vlen_dt = tgt_dt.metadata["vlen"]
+
+            if has_reference(src_vlen_dt):
+                if isinstance(val, np.ndarray) and val.shape == ():
+                    val = val[()]
+                if isinstance(val, np.ndarray) or isinstance(val, list) or isinstance(val, tuple):
+                    count = len(val)
+                    out = np.zeros((count,), dtype=tgt_dt)
+                    for i in range(count):
+                        e = val[i]
+                        out[i] = self._copy_element(e, src_vlen_dt, tgt_vlen_dt, fout=fout)
+                else:
+                    # scalar array
+                    v = self._copy_element(val, src_vlen_dt, tgt_vlen_dt, fout=fout)
+                    out = np.array(v, dtype=tgt_dt)
+            else:
+                # can just directly copy the array
+                out = np.zeros(val.shape, dtype=tgt_dt)
+                out[...] = val[...]
+        else:
+            out = val  # can just copy as is
+        return out
+
+    def _copy_array(self, src_arr, fout=None):
+        """Copy the numpy array to a new array.
+            Convert any reference type to point to item in the target's hierarchy.
+        """
+        if not isinstance(src_arr, np.ndarray):
+            raise TypeError(f"Expecting ndarray, but got: {src_arr}")
+        tgt_dt = convert_dtype(src_arr.dtype, to_h5py=True)
+        tgt_arr = np.zeros(src_arr.shape, dtype=tgt_dt)
+
+        if has_reference(src_arr.dtype):
+            # flatten array to simplify iteration
+            count = int(np.prod(src_arr.shape))
+            tgt_arr_flat = tgt_arr.reshape((count,))
+            src_arr_flat = src_arr.reshape((count,))
+            for i in range(count):
+                e = src_arr_flat[i]
+                element = self._copy_element(e, src_arr.dtype, tgt_dt, fout=fout)
+                tgt_arr_flat[i] = element
+            tgt_arr = tgt_arr_flat.reshape(src_arr.shape)
+        else:
+            # can just copy the entire array
+            tgt_arr[...] = src_arr[...]
+        return tgt_arr
+
+    def _createGroup(self, parent, grp_json, name=None):
+        """ create the group and any links it contains """
+        grp = parent.create_group(name)
+        return grp
+
+    def _createDataset(self, parent, dset_json, name=None):
+        """ create a dataset object """
+
+        dtype = self.db.getDtype(dset_json)
+
+        kwargs = {"dtype": dtype}
+        shape_json = dset_json["shape"]
+        shape_class = shape_json["class"]
+        if shape_class == "H5S_NULL":
+            # skip the shape keyword to create a null space dataset
+            pass
+        elif shape_class == "H5S_SCALAR":
+            kwargs["shape"] = ()
+        else:
+            kwargs["shape"] = shape_json["dims"]
+        if "dcpl" in dset_json and shape_class != "H5S_NULL":
+            creation_props = dset_json["dcpl"]
+            if "fillValue" in creation_props:
+                fillvalue = creation_props["fillValue"]
+                if fillvalue and len(dtype) > 1 and type(fillvalue) in (list, tuple):
+                    # for compound types, need to convert from list to dataset compatible element
+
+                    if len(dtype) != len(fillvalue):
+                        msg = "fillvalue has incorrect number of elements"
+                        self.log.warning(msg)
+                        raise ValueError(msg)
+
+                    fillvalue = jsonToArray((), dtype, fillvalue)
+
+                kwargs["fillvalue"] = fillvalue
+
+            if "trackTimes" in creation_props:
+                kwargs["track_times"] = creation_props["trackTimes"]
+            if "layout" in creation_props:
+                layout = creation_props["layout"]
+                if "dims" in layout:
+                    kwargs["chunks"] = tuple(layout["dims"])
+            if "filters" in creation_props:
+                filter_props = creation_props["filters"]
+                for filter_prop in filter_props:
+                    if "id" not in filter_prop:
+                        self.log.warning("filter id not provided")
+                        continue
+                    filter_id = filter_prop["id"]
+                    if filter_id not in filters._HDF_FILTERS:
+                        self.log.warning(f"unknown filter id: {filter_id} ignoring")
+                        continue
+
+                    hdf_filter = filters._HDF_FILTERS[filter_id]
+
+                    self.log.info(f"got filter: {filter_id}")
+                    if "alias" not in hdf_filter:
+                        self.log.warning(f"unsupported filter id: {filter_id} ignoring")
+                        continue
+
+                    filter_alias = hdf_filter["alias"]
+                    if not h5py.h5z.filter_avail(filter_id):
+                        msg = "compression filter not available, filter: {filter_alias}, ignoring"
+                        self.log.warning(msg)
+                        continue
+                    if filter_alias in filters._H5PY_COMPRESSION_FILTERS:
+                        if kwargs.get("compression"):
+                            msg = f"compression filter already set for {filter_alias}, ignoring"
+                            self.log.info(msg)
+                            continue
+
+                        kwargs["compression"] = filter_alias
+                        self.log.info("setting compression filter to: {filter_alias}")
+                        if filter_alias == "gzip":
+                            # check for an optional compression value
+                            if "level" in filter_prop:
+                                kwargs["compression_opts"] = filter_prop["level"]
+                        elif filter_alias == "szip":
+                            bitsPerPixel = None
+                            coding = "nn"
+
+                            if "bitsPerPixel" in filter_prop:
+                                bitsPerPixel = filter_prop["bitsPerPixel"]
+                            if "coding" in filter_prop:
+                                if filter_prop["coding"] == "H5_SZIP_EC_OPTION_MASK":
+                                    coding = "ec"
+                                elif filter_prop["coding"] == "H5_SZIP_NN_OPTION_MASK":
+                                    coding = "nn"
+                                else:
+                                    self.log.warning("invalid szip option: 'coding'")
+                            # note: pixelsPerBlock, and pixelsPerScanline not supported by h5py,
+                            # so these options will be ignored
+                            if "pixelsPerBlock" in filter_props:
+                                self.log.info("ignoring szip option: 'pixelsPerBlock'")
+                            if "pixelsPerScanline" in filter_props:
+                                self.log.info("ignoring szip option: 'pixelsPerScanline'")
+                            if bitsPerPixel:
+                                kwargs["compression_opts"] = (coding, bitsPerPixel)
+                    else:
+                        if filter_alias == "shuffle":
+                            kwargs["shuffle"] = True
+                        elif filter_alias == "fletcher32":
+                            kwargs["fletcher32"] = True
+                        elif filter_alias == "scaleoffset":
+                            if "scaleOffset" not in filter_prop:
+                                msg = "No scale_offset provided for scale offset filter, ignoring"
+                                self.log(msg)
+                                continue
+                            kwargs["scaleoffset"] = filter_prop["scaleOffset"]
+                        else:
+                            self.log.info(f"Unexpected filter name: {filter_alias}, ignoring")
+
+        dset = parent.create_dataset(name, **kwargs)
+        return dset
+
+    def _createDatatype(self, parent, ctype_json, name=None):
+        """ create a datatype object """
+
+        type_item = ctype_json["type"]
+        dtype = createDataType(type_item)
+        parent[name] = dtype
+        return parent[name]
+
+    def _createObjects(self, parent, links_json, visited=set()):
+        """ create child object in the given group, recurse for any sub-groups """
+
+        for title in links_json:
+            link_json = links_json[title]
+            link_class = link_json["class"]
+            if link_class == "H5L_TYPE_SOFT" and title not in parent:
+                h5path = link_json["h5path"]
+                parent[title] = h5py.SoftLink(h5path)
+            elif link_class == "H5L_TYPE_EXTERNAL" and title not in parent:
+                h5path = link_json["h5path"]
+                filename = link_json["file"]
+                parent[title] = h5py.ExternalLink(filename, h5path)
+            elif link_class == "H5L_TYPE_USER_DEFINED" and title not in parent:
+                self.log.warning("unable to create user-defined link: {title}")
+            elif link_class == "H5L_TYPE_HARD":
+                tgt_id = link_json["id"]
+
+                collection = getCollectionForId(tgt_id)
+
+                obj_json = self.db.getObjectById(tgt_id)
+
+                if tgt_id in self._id_map:
+                    # object has already been created
+                    tgt_path = self._id_map[tgt_id]
+                    tgt_obj = parent[tgt_path]
+                    if title not in parent:
+                        parent[title] = tgt_obj
+                    if collection == "groups" and tgt_id not in visited:
+                        # recurse over sub-objects to pick up any new links
+                        grp_links = obj_json["links"]
+                        visited.add(tgt_id)
+                        self._createObjects(tgt_obj, grp_links, visited=visited)
+                else:
+                    # need to create tgt_id object
+                    parent_path = parent.name
+                    if parent_path[-1] != '/':
+                        parent_path += '/'
+                    self._id_map[tgt_id] = parent_path + title
+                    kwds = {"name": title}
+                    if collection == "groups":
+                        tgt_grp = self._createGroup(parent, obj_json, **kwds)
+                        if "links" in obj_json:
+                            grp_links = obj_json["links"]
+                            visited.add(tgt_id)
+                            self._createObjects(tgt_grp, grp_links, visited=visited)
+                    elif collection == "datasets":
+                        self._createDataset(parent, obj_json, **kwds)
+                    elif collection == "datatypes":
+                        self._createDatatype(parent, obj_json, **kwds)
+                    else:
+                        self.log.warning(f"unexpected collection: {collection}")
+                visited.add(tgt_id)
+
+            else:
+                self.log.warning(f"unexpected link class: {link_class}")
+
+    def updateDatasetValues(self, dset_id, dset):
+        """ write any pending dataset values """
+        dset_json = self.db.getObjectById(dset_id)
+        if "updates" not in dset_json:
+            return
+        updates = dset_json["updates"]
+        for (sel, val) in updates:
+            slices = []
+            for dim in range(len(sel.shape)):
+                start = sel.start[dim]
+                stop = start + sel.count[dim]
+                step = sel.step[dim]
+                slices.append(slice(start, stop, step))
+            slices = tuple(slices)
+            dset[slices] = val
+            self.log.debug(f"h5py_writer dset {dset.name} updated")
+
+    def initializeDatasetValues(self, dset_id, dset):
+        """ write all dataset values """
+
+        if dset.shape is None:
+            return  # null space dataset
+
+        sel_all = selections.select(dset.shape, ...)
+        arr = self.db.getDatasetValues(dset_id, sel_all)
+        if arr is not None:
+            dset[...] = arr
+
+    def createAttribute(self, obj, name, attr_json):
+        """ add the given attribute to obj """
+
+        src_dt = self.db.getDtype(attr_json)
+
+        # handle special case of null space attribute here
+        shape_json = attr_json["shape"]
+        shape_class = shape_json["class"]
+        if shape_class == "H5S_NULL":
+            obj.attrs[name] = h5py.Empty(convert_dtype(src_dt, to_h5py=True))
+            return
+
+        if shape_class == "H5S_SCALAR":
+            dims = ()
+        else:
+            dims = shape_json["dims"]
+        src_arr = jsonToArray(dims, src_dt, attr_json["value"])
+        if not isinstance(src_arr, np.ndarray):
+            raise TypeError("Unexpected type for src_arr")
+        tgt_arr = self._copy_array(src_arr, fout=obj.file)
+        obj.attrs[name] = tgt_arr
+
+    def updateAttributes(self, obj_id, obj):
+        """ create/replace any modified attributes """
+
+        obj_json = self.db.getObjectById(obj_id)
+
+        if "attributes" not in obj_json:
+            # no attributes
+            return
+
+        attrs = obj_json["attributes"]
+        for name in attrs:
+            attr_json = attrs[name]
+            if "created" in attr_json and attr_json["created"] < self._flush_time:
+                # attribute should be saved already
+                continue
+            self.createAttribute(obj, name, attr_json)
+
+    def flush(self):
+        """ Write dirty items """
+        if self.closed:
+            # no db set yet
+            self.log.warning("h5py_writer - flush called but no db")
+            return False
+        if not self._f:
+            self.log.warning("h5py_writer file not open")
+            raise IOError("open not called")
+
+        self.log.info("h5py_writer.flush()")
+
+        root_id = self.db.root_id
+        self._id_map[root_id] = "/"
+
+        if self.db.new_objects or self._init:
+            root_json = self.db.getObjectById(root_id)
+
+            if "links" in root_json:
+                root_links = root_json["links"]
+                self._createObjects(self._f, root_links, visited=set((root_id,)))
+
+        # update attributes, dataset values
+        for obj_id in self._id_map:
+            if self.db.is_dirty(obj_id) or self._init:
+                h5path = self._id_map[obj_id]
+                obj = self._f[h5path]
+                self.updateAttributes(obj_id, obj)
+                collection = getCollectionForId(obj_id)
+                if collection == "datasets" and not self.no_data:
+                    if self._init:
+                        self.initializeDatasetValues(obj_id, obj)
+                    else:
+                        self.updateDatasetValues(obj_id, obj)
+        # mark time write is complete
+        # updates before this time will not need to be written
+        # TBD: possible race condition with multithreading
+        self._flush_time = time.time()
+
+        self._init = False  # done with init after first flush
+        return True  # all objects written successfully
+
+    def open(self):
+        """ open HDF5 file """
+        self.log.debug("h5pyWriter open")
+        if self.db is None:
+            # no db set yet
+            self.log.warning("no self.db db_ref")
+            raise ValueError("no db")
+        mode = 'a' if self._append else 'w'
+        self.log.info(f"creating h5py file: {self._filepath} mode: {mode}")
+        self._f = h5py.File(self._filepath, mode=mode)
+        self._append = True  # switch to append mode for next file open
+        if self.db.root_id:
+            self._root_id = self.db.root_id
+        else:
+            self._root_id = createObjId(obj_type="groups")
+        return self._root_id
+
+    def close(self):
+        """ close storage handle """
+        self.log.debug("h5py_writer.close()")
+        if not self._f:
+            # no open on file
+            return
+        self.flush()
+        self._f.close()
+        self._f = None
+
+    def isClosed(self):
+        """ return closed status """
+        return False if self._f else True
diff --git a/src/h5json/h5reader.py b/src/h5json/h5reader.py
new file mode 100644
index 0000000..3bf49ca
--- /dev/null
+++ b/src/h5json/h5reader.py
@@ -0,0 +1,94 @@
+##############################################################################
+# Copyright by The HDF Group.                                                #
+# All rights reserved.                                                       #
+#                                                                            #
+# This file is part of H5Serv (HDF5 REST Server) Service, Libraries and      #
+# Utilities.  The full HDF5 REST Server copyright notice, including          #
+# terms governing use, modification, and redistribution, is contained in     #
+# the file COPYING, which can be found at the root of the source code        #
+# distribution tree.  If you do not have access to this file, you may        #
+# request a copy from help@hdfgroup.org.                                     #
+##############################################################################
+from abc import ABC, abstractmethod
+import weakref
+
+import logging
+
+
+class H5Reader(ABC):
+    """
+    This abstract class defines properties and methods that the Hdf5db class uses for reading from an HDF5
+    compatible storage medium.
+    """
+
+    def __init__(
+        self,
+        filepath,
+        app_logger=None
+    ):
+        self._filepath = filepath
+        if app_logger:
+            self.log = app_logger
+        else:
+            self.log = logging.getLogger()
+
+    def set_db(self, db):
+        self._db_ref = weakref.ref(db)
+
+    @property
+    def db(self):
+        if not self._db_ref:
+            raise ValueError("db not available")
+        return self._db_ref()
+
+    @property
+    def filepath(self):
+        """ return filepath """
+        return self._filepath
+
+    @property
+    def closed(self):
+        """ return True if the reader handle is closed (or never opened) """
+        return self.isClosed()
+
+    @abstractmethod
+    def get_root_id(self):
+        """ Return root id """
+        pass
+
+    @abstractmethod
+    def getObjectById(self, obj_id, include_attrs=True, include_links=True):
+        """ return object with given id """
+        pass
+
+    @abstractmethod
+    def getAttribute(self, obj_id, name, includeData=True):
+        """
+        Get attribute given an object id and name
+        returns: JSON object
+        """
+        pass
+
+    @abstractmethod
+    def getDatasetValues(self, obj_id, sel=None, dtype=None):
+        """
+        Get values from dataset identified by obj_id.
+        If a slices list or tuple is provided, it should have the same
+        number of elements as the rank of the dataset.
+        """
+        pass
+
+    @abstractmethod
+    def open(self):
+        """ Open data source for reading """
+        pass
+
+    @abstractmethod
+    def close(self):
+        """ close any open handles to the storage """
+        pass
+
+    @abstractmethod
+    def isClosed(self):
+        """ return True if handle is closed """
+        pass
diff --git a/src/h5json/h5tojson/h5tojson.py b/src/h5json/h5tojson/h5tojson.py
index 89a65bd..284de84 100755
--- a/src/h5json/h5tojson/h5tojson.py
+++ b/src/h5json/h5tojson/h5tojson.py
@@ -10,235 +10,44 @@
 # request a copy from help@hdfgroup.org.                                     #
 ##############################################################################
 import sys
-import json
-import argparse
 import os.path as op
-import tempfile
 import logging
-import logging.handlers
-from h5json import Hdf5db
-from h5json import hdf5dtype
-
-
-class DumpJson:
-    """
-    DumpJson - return json representation of all objects within the given file
-    """
-
-    def __init__(self, db, app_logger=None, options=None):
-        self.options = options
-        self.db = db
-        if app_logger:
-            self.log = app_logger
-        else:
-            self.log = logging.getLogger()
-        self.json = {}
-
-    def dumpAttribute(self, col_name, uuid, attr_name):
-        self.log.info("dumpAttribute: [" + attr_name + "]")
-        item = self.db.getAttributeItem(col_name, uuid, attr_name)
-        response = {"name": attr_name}
-        typeItem = item["type"]
-        response["type"] = hdf5dtype.getTypeResponse(typeItem)
-        response["shape"] = item["shape"]
-        if not self.options.D:
-            if "value" not in item:
-                self.log.warning("no value key in attribute: " + attr_name)
-            else:
-                response["value"] = item[
-                    "value"
-                ]  # dump values unless header -D was passed
-        return response
-
-    def dumpAttributes(self, col_name, uuid):
-        attr_list = self.db.getAttributeItems(col_name, uuid)
-        self.log.info("dumpAttributes: " + uuid)
-        items = []
-        for attr in attr_list:
-            item = self.dumpAttribute(col_name, uuid, attr["name"])
-            items.append(item)
-
-        return items
-
-    def dumpLink(self, uuid, name):
-        item = self.db.getLinkItemByUuid(uuid, name)
-        for key in ("ctime", "mtime", "href"):
-            if key in item:
-                del item[key]
-        return item
-
-    def dumpLinks(self, uuid):
-        link_list = self.db.getLinkItems(uuid)
-        items = []
-        for link in link_list:
-            item = self.dumpLink(uuid, link["title"])
-            items.append(item)
-        return items
-
-    def dumpGroup(self, uuid):
-        item = self.db.getGroupItemByUuid(uuid)
-        if "alias" in item:
-            alias = item["alias"]
-            if alias:
-                self.log.info("dumpGroup alias: [" + alias[0] + "]")
-        for key in ("ctime", "mtime", "linkCount", "attributeCount", "id"):
-            if key in item:
-                del item[key]
-        attributes = self.dumpAttributes("groups", uuid)
-        if attributes:
-            item["attributes"] = attributes
-        links = self.dumpLinks(uuid)
-        if links:
-            item["links"] = links
-        return item
-
-    def dumpGroups(self):
-        groups = {}
-        item = self.dumpGroup(self.root_uuid)
-        groups[self.root_uuid] = item
-        uuids = self.db.getCollection("groups")
-        for uuid in uuids:
-            item = self.dumpGroup(uuid)
-            groups[uuid] = item
-
-        self.json["groups"] = groups
-
-    def dumpDataset(self, uuid):
-        response = {}
-        self.log.info("dumpDataset: " + uuid)
-        item = self.db.getDatasetItemByUuid(uuid)
-        if "alias" in item:
-            alias = item["alias"]
-            if alias:
-                self.log.info("dumpDataset alias: [" + alias[0] + "]")
-            response["alias"] = item["alias"]
-
-        typeItem = item["type"]
-        response["type"] = hdf5dtype.getTypeResponse(typeItem)
-        shapeItem = item["shape"]
-        shape_rsp = {}
-        num_elements = 1
-        shape_rsp["class"] = shapeItem["class"]
-        if "dims" in shapeItem:
-            shape_rsp["dims"] = shapeItem["dims"]
-            for dim in shapeItem["dims"]:
-                num_elements *= dim
-        if "maxdims" in shapeItem:
-            maxdims = []
-            for dim in shapeItem["maxdims"]:
-                if dim == 0:
-                    maxdims.append("H5S_UNLIMITED")
-                else:
-                    maxdims.append(dim)
-            shape_rsp["maxdims"] = maxdims
-        response["shape"] = shape_rsp
-
-        if "creationProperties" in item:
-            response["creationProperties"] = item["creationProperties"]
-
-        attributes = self.dumpAttributes("datasets", uuid)
-        if attributes:
-            response["attributes"] = attributes
-
-        if not (self.options.D or self.options.d):
-            if num_elements > 0:
-                value = self.db.getDatasetValuesByUuid(uuid)
-                response["value"] = value  # dump values unless header flag was passed
-            else:
-                response["value"] = []  # empty list
-        return response
 
-    def dumpDatasets(self):
-        uuids = self.db.getCollection("datasets")
-        if uuids:
-            datasets = {}
-            for uuid in uuids:
-                item = self.dumpDataset(uuid)
-                datasets[uuid] = item
-
-            self.json["datasets"] = datasets
-
-    def dumpDatatype(self, uuid):
-        response = {}
-        item = self.db.getCommittedTypeItemByUuid(uuid)
-        response["alias"] = item["alias"]
-        typeItem = item["type"]
-        response["type"] = hdf5dtype.getTypeResponse(typeItem)
-        attributes = self.dumpAttributes("datatypes", uuid)
-        if attributes:
-            response["attributes"] = attributes
-        return response
-
-    def dumpDatatypes(self):
-        uuids = self.db.getCollection("datatypes")
-        if uuids:
-            datatypes = {}
-            for uuid in uuids:
-                item = self.dumpDatatype(uuid)
-                datatypes[uuid] = item
-
-            self.json["datatypes"] = datatypes
-
-    def dumpFile(self):
-
-        self.root_uuid = self.db.getUUIDByPath("/")
-
-        db_version_info = self.db.getVersionInfo()
-
-        self.json["apiVersion"] = db_version_info["hdf5-json-version"]
-        self.json["root"] = self.root_uuid
-
-        self.dumpGroups()
-
-        self.dumpDatasets()
-
-        self.dumpDatatypes()
-
-        print(json.dumps(self.json, sort_keys=True, indent=4))
-
-
-def getTempFileName():
-    """
-    Generate a temporary filename to avoid problems with trying to create a dbfile
-    in a read-only directory.  (See: https://github.com/HDFGroup/h5serv/issues/37)
-    """
-    f = tempfile.NamedTemporaryFile(delete=False)
-    f.close()
-    return f.name
+from h5json import Hdf5db
+from h5json.jsonstore.h5json_writer import H5JsonWriter
+from h5json.h5pystore.h5py_reader import H5pyReader
 
 
 def main():
-    parser = argparse.ArgumentParser(usage="%(prog)s [-h] [-D|-d] <hdf5_file>")
-    parser.add_argument("-D", action="store_true", help="surpress all data output")
-    parser.add_argument(
-        "-d",
-        action="store_true",
-        help="surpress data output for" + " datasets (but not attribute values)",
-    )
-    parser.add_argument("filename", nargs="+", help="HDF5 to be converted to json")
-    args = parser.parse_args()
+    if len(sys.argv) < 2 or sys.argv[1] in ("-h", "--help"):
+        print(f"usage: {sys.argv[0]} [-h] [--nodata] <hdf5_file>")
+        sys.exit(0)
+
+    no_data = False
+    filename = None
+    for i in range(1, len(sys.argv)):
+        if sys.argv[i] == "--nodata":
+            no_data = True
+        else:
+            filename = sys.argv[i]
 
     # create logger
-    log = logging.getLogger("h5serv")
-    # log.setLevel(logging.WARN)
-    log.setLevel(logging.INFO)
-    # add log handler
-    handler = logging.FileHandler("./h5tojson.log")
-
-    # add handler to logger
-    log.addHandler(handler)
+    logfname = "h5tojson.log"
+    loglevel = logging.DEBUG
+    logging.basicConfig(filename=logfname, format='%(levelname)s %(asctime)s %(message)s', level=loglevel)
+    log = logging.getLogger()
 
-    filename = args.filename[0]
+    # check that the input file exists
     if not op.isfile(filename):
-        sys.exit("Cannot find file: %s" % filename)
+        sys.exit(f"Cannot find file: {filename}")
 
-    log.info("h5tojson " + filename)
+    log.info(f"h5tojson {filename}")
 
-    dbFilename = getTempFileName()
-    log.info("Using dbFile: " + dbFilename)
-    with Hdf5db(filename, dbFilePath=dbFilename, readonly=True, app_logger=log) as db:
-        dumper = DumpJson(db, app_logger=log, options=args)
-        dumper.dumpFile()
+    db = Hdf5db(app_logger=log)
+    db.reader = H5pyReader(filename, app_logger=log)
+    db.writer = H5JsonWriter(None, no_data=no_data, app_logger=log)
+    db.open()  # read HDF5 data into db
+    db.close()  # close will trigger write to json file
 
 
 if __name__ == "__main__":
diff --git a/src/h5json/h5writer.py b/src/h5json/h5writer.py
new file mode 100644
index 0000000..3dfb8da
--- /dev/null
+++ b/src/h5json/h5writer.py
@@ -0,0 +1,85 @@
+##############################################################################
+# Copyright by The HDF Group.                                                #
+# All rights reserved.                                                       #
+#                                                                            #
+# This file is part of H5Serv (HDF5 REST Server) Service, Libraries and      #
+# Utilities.  The full HDF5 REST Server copyright notice, including          #
+# terms governing use, modification, and redistribution, is contained in     #
+# the file COPYING, which can be found at the root of the source code        #
+# distribution tree.  If you do not have access to this file, you may        #
+# request a copy from help@hdfgroup.org.                                     #
+##############################################################################
+from abc import ABC, abstractmethod
+import weakref
+import logging
+
+
+class H5Writer(ABC):
+    """
+    This abstract class defines properties and methods that the Hdf5db class uses for writing to an HDF5
+    compatible storage medium.
+    """
+
+    def __init__(
+        self,
+        filepath,
+        append=False,
+        no_data=False,
+        app_logger=None
+    ):
+        self._filepath = filepath
+        self._append = append
+        self._no_data = no_data
+        self._filepath = filepath
+        self._db_ref = None
+        if app_logger:
+            self.log = app_logger
+        else:
+            self.log = logging.getLogger()
+
+    def set_db(self, db):
+        self._db_ref = weakref.ref(db)
+        self.log.debug("writer set db ref")
+
+    @property
+    def filepath(self):
+        return self._filepath
+
+    @property
+    def closed(self):
+        return self.isClosed()
+
+    @property
+    def db(self):
+        if not self._db_ref:
+            self.log.debug("db not available")
+            return None
+        return self._db_ref()
+
+    @property
+    def append(self):
+        return self._append
+
+    @property
+    def no_data(self):
+        return self._no_data
+
+    @abstractmethod
+    def open(self):
+        """ open storage handle, return root_id"""
+        return None
+
+    @abstractmethod
+    def flush(self):
+        """ Write dirty items """
+        pass
+
+    @abstractmethod
+    def close(self):
+        """ close storage handle """
+        pass
+
+    @abstractmethod
+    def isClosed(self):
+        """ return True if handle is closed """
+        pass
diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py
index 27f2094..581399f 100644
--- a/src/h5json/hdf5db.py
+++ b/src/h5json/hdf5db.py
@@ -9,3523 +9,782 @@
 # distribution tree.  If you do not have access to this file, you may        #
 # request a copy from help@hdfgroup.org.                                     #
 ##############################################################################
-import errno
 import time
-import h5py
 import numpy as np
-import uuid
-import os.path as op
-import os
-import json
 import logging
-from .hdf5dtype import getTypeItem, createDataType, getItemSize
+from .hdf5dtype import getTypeItem, createDataType, Reference, special_dtype
+from .array_util import jsonToArray, bytesArrayToList
+from .dset_util import resize_dataset
+from .objid import createObjId, getCollectionForId, isValidUuid, getUuidFromId
+from . import selections
 from .apiversion import _apiver
-
-
-# global dictionary to direct back to the Hdf5db instance by filename
-# (needed for visititems callback)
-# Will break in multi-threaded context
-_db = {}
-
-UUID_LEN = 36  # length for uuid strings
-
-# standard compress filters
-_HDF_FILTERS = {
-    1: {"class": "H5Z_FILTER_DEFLATE", "alias": "gzip", "options": ["level"]},
-    2: {"class": "H5Z_FILTER_SHUFFLE", "alias": "shuffle"},
-    3: {"class": "H5Z_FILTER_FLETCHER32", "alias": "fletcher32"},
-    4: {
-        "class": "H5Z_FILTER_SZIP",
-        "alias": "szip",
-        "options": ["bitsPerPixel", "coding", "pixelsPerBlock", "pixelsPerScanLine"],
-    },
-    5: {"class": "H5Z_FILTER_NBIT"},
-    6: {
-        "class": "H5Z_FILTER_SCALEOFFSET",
-        "alias": "scaleoffset",
-        "options": ["scaleType", "scaleOffset"],
-    },
-    32000: {"class": "H5Z_FILTER_LZF", "alias": "lzf"},
-}
-
-_HDF_FILTER_OPTION_ENUMS = {
-    "coding": {
-        h5py.h5z.SZIP_EC_OPTION_MASK: "H5_SZIP_EC_OPTION_MASK",
-        h5py.h5z.SZIP_NN_OPTION_MASK: "H5_SZIP_NN_OPTION_MASK",
-    },
-    "scaleType": {
-        h5py.h5z.SO_FLOAT_DSCALE: "H5Z_SO_FLOAT_DSCALE",
-        h5py.h5z.SO_FLOAT_ESCALE: "H5Z_SO_FLOAT_ESCALE",
-        h5py.h5z.SO_INT: "H5Z_SO_INT",
-    },
-}
-
-# h5py supported filters
-_H5PY_FILTERS = {
-    "gzip": 1,
-    "shuffle": 2,
-    "fletcher32": 3,
-    "szip": 4,
-    "scaleoffset": 6,
-    "lzf": 32000,
-}
-
-_H5PY_COMPRESSION_FILTERS = ("gzip", "lzf", "szip")
-
-
-def visitObj(path, obj):
-    hdf5db = _db[obj.file.filename]
-    hdf5db.visit(path, obj)
+from .h5reader import H5Reader
+from .h5writer import H5Writer
 
 
 class Hdf5db:
     """
-    This class is used to manage UUID lookup tables for primary HDF objects (Groups, Datasets,
-    and Datatypes).  For HDF5 files that are read/write, this information is managed within
-    the file itself in the "__db__" group.  For read-only files, the data is managed in
-    an external file (domain filename with ".db" extension).
-
-    "___db__"  ("root" for read-only case)
-        description: Group object (member of root group). Only objects below this group are used
-                for UUID data
-        members: "{groups}", "{datasets}", "{datatypes}", "{objects}", "{paths}"
-        attrs: 'rootUUID': UUID of the root group
-
-    "{groups}"
-        description: contains map of UUID->group objects
-        members: hard link to each anonymous group (i.e. groups which are not
-            linked to by anywhere else).  Link name is the UUID
-        attrs: group reference (or path for read-only files) to the group (for non-
-            anonymous groups).
-
-    "{datasets}"
-        description: contains map of UUID->dataset objects
-        members: hard link to each anonymous dataset (i.e. datasets which are not
-            linked to by anywhere else).  Link name is the UUID
-        attrs: dataset reference (or path for read-only files) to the dataset (for non-
-            anonymous datasets).
-
-    "{dataset_props}:
-        description contains dataset creation properties"
-        members: sub-group with link name as UUID.  Sub-group attributes are the creation props
-
-    "{datatypes}"
-        description: contains map of UUID->datatyped objects
-        members: hard link to each anonymous datatype (i.e. datatypes which are not
-            linked to by anywhere else).  Link name is the UUID
-        attrs: datatype reference (or path for read-only files) to the datatype (for non-
-            anonymous datatypes).
-
-    "{addr}"
-        description: contains map of file offset to UUID.
-        members: none
-        attrs: map of file offset to UUID
+    This class is used to manage id lookup tables for primary HDF objects (Groups, Datasets,
+    and Datatypes).  By default all data is held in-memory.  Initialize with h5_reader to read from
+    an HDF5 compatible storage pool, and or, h5_writer to write to an HDF5 compatible storage pool.
     """
 
-    @staticmethod
-    def createHDF5File(filePath):
-        # create an "empty" hdf5 file
-        # if op.isfile(filePath):
-        #     raise IOError(errno.EEXIST, "Resource already exists")
-
-        f = h5py.File(filePath, "w")
-        f.close()
-
     @staticmethod
     def getVersionInfo():
         versionInfo = {}
         versionInfo["hdf5-json-version"] = _apiver
-        versionInfo["h5py_version"] = h5py.version.version
-        versionInfo["hdf5_version"] = h5py.version.hdf5_version
         return versionInfo
 
     def __init__(
         self,
-        filePath,
-        dbFilePath=None,
-        readonly=False,
+        h5_reader: H5Reader = None,
+        h5_writer: H5Writer = None,
         app_logger=None,
-        root_uuid=None,
-        update_timestamps=True,
-        userid=None,
     ):
         if app_logger:
             self.log = app_logger
         else:
             self.log = logging.getLogger()
-        if len(filePath) == 0 or not op.isfile(filePath):
-            raise IOError(errno.ENXIO, "file not found")
-        if not h5py.is_hdf5(filePath):
-            raise IOError(errno.EINVAL, "not an HDF5 file")
-
-        mode = "r"
-        if readonly:
-            self.readonly = True
-        else:
-            if not os.stat(filePath).st_mode & 0o200:
-                # file is read-only
-                self.readonly = True
+
+        self._db = {}
+
+        self._new_objects = set()  # set of for newly created objects
+        self._dirty_objects = set()  # set of modified objects
+        self._deleted_objects = set()  # set of deleted objects
+
+        self._root_id = None
+
+        if h5_reader:
+            self._reader = h5_reader
+            self._reader.set_db(self)
+        else:
+            self._reader = None
+
+        if h5_writer:
+            self._writer = h5_writer
+            self._writer.set_db(self)
+        else:
+            self._writer = None
+
+    @property
+    def db(self):
+        """ return object db dictionary """
+        return self._db
+
+    @property
+    def reader(self):
+        """ return reader instance """
+        return self._reader
+
+    @reader.setter
+    def reader(self, value: H5Reader):
+        """ set the reader """
+        if self._writer:
+            self.flush()
+        if self._reader:
+            self._reader.close()
+        self._reader = value
+        self._reader.set_db(self)
+        """
+        root_id = value.get_root_id()
+        if not root_id:
+            raise ValueError(f"reader {type(value)} unable to return root_id")
+        group_json = value.getObjectById(root_id)
+        if not group_json:
+            raise ValueError(f"reader {type(value)} unable to return group json")
+        self._reader = value
+        self._db[root_id] = group_json
+        self._root_id = root_id
+        """
+
+    @property
+    def writer(self):
+        """ return writer instance """
+        return self._writer
+
+    @writer.setter
+    def writer(self, value: H5Writer):
+        """ set the writer """
+        if self._writer:
+            self._writer.close()
+        self._writer = value
+        if self._writer:
+            self.log.debug("writer set_db")
+            self._writer.set_db(self)
+
+    @property
+    def root_id(self):
+        """ return root uuid """
+        return self._root_id
+
+    def is_new(self, obj_id):
+        """ return true if this is a new object (has not been persisted) """
+        return obj_id in self._new_objects
+
+    def is_dirty(self, obj_id):
+        """ return true if this object has been modified """
+        if self.is_new(obj_id):
+            return True
+        return obj_id in self._dirty_objects
+
+    @property
+    def new_objects(self):
+        return self._new_objects
+
+    @property
+    def dirty_objects(self):
+        return self._dirty_objects
+
+    @property
+    def deleted_objects(self):
+        return self._deleted_objects
+
+    def make_dirty(self, obj_id):
+        """ Mark the object as dirty and update the lastModified timestamp """
+        if self.is_new(obj_id):
+            # object hasn't been initially written yet, just return
+            return
+        if obj_id not in self.db:
+            self.log.error("make dirty called on deleted object")
+            raise KeyError(f"obj_id: {obj_id} not found")
+        if self.db[obj_id] is None:
+            # object deleted, just return
+            return
+        obj_json = self.db[obj_id]
+        obj_json["lastModified"] = time.time()
+        self._dirty_objects.add(obj_id)
+
+    def flush(self):
+        """ write out any changes """
+        self.log.debug("db.flush()")
+        if not self.writer:
+            return  # nothing to do
+        if not self.writer.flush():
+            # flush not successful, don't clear dirty set
+            return
+
+        # reset new and dirty sets
+        self._new_objects = set()
+        self._dirty_objects = set()
+
+    def open(self):
+        """ open reader and writer if set """
+        self.log.debug("db.open()")
+        if self.root_id:
+            self.log.debug("root id already set, re-open call")
+            if self.writer:
+                self.writer.open()
+            if self.reader:
+                self.reader.open()
+        else:
+            self.log.debug("db.open, getting root_id")
+
+            if self.writer and self.writer.append:
+                # append mode for the writer, open writer and get the root id
+                self.log.debug("db.open, write append, getting root_id from writer")
+                self._root_id = self.writer.open()
+                if self.reader:
+                    reader_root_id = self.reader.open()
+                    if reader_root_id != self._root_id:
+                        # TBD: need someway to reconcile if both reader and writer have
+                        # an potentiated idea on what there root id is
+                        self.log.warn("reader root_id does not match writer root_id")
+            elif self.reader:
+                self.log.debug("db.open, getting root_id from reader")
+                self._root_id = self.reader.open()
+                if self.writer:
+                    writer_root_id = self.writer.open()
+                    if writer_root_id != self._root_id:
+                        # TBD: same as above, need to deal with inconsistent root ids
+                        self.log.warning("writer root_id does not match reader root_id")
             else:
-                mode = "r+"
-                self.readonly = False
-
-        self.log.info("init -- filePath: " + filePath + " mode: " + mode)
-
-        self.update_timestamps = update_timestamps
-
-        self.f = h5py.File(filePath, mode, libver="latest")
-
-        self.root_uuid = root_uuid
-
-        if self.readonly:
-            # for read-only files, add a dot in front of the name to be used as
-            # the db file.  This won't collide with actual data files, since
-            # "." is not allowed as the first character in a domain name.
-            if not dbFilePath:
-                dirname = op.dirname(self.f.filename)
-                basename = op.basename(self.f.filename)
-                if len(dirname) > 0:
-                    dbFilePath = dirname + "/." + basename
-                else:
-                    dbFilePath = "." + basename
-            dbMode = "r+"
-            if not op.isfile(dbFilePath):
-                dbMode = "w"
-            self.log.info("dbFilePath: " + dbFilePath + " mode: " + dbMode)
-            self.dbf = h5py.File(dbFilePath, dbMode)
-        else:
-            self.dbf = None  # for read only
-        # create a global reference to this class
-        # so visitObj can call back
-        _db[filePath] = self
+                # no root id set by writer or reader, initialize now
+                self._root_id = createObjId(obj_type="groups")
+                if self.writer:
+                    # open writer in create mode now that we have a root id
+                    self.writer.open()
+
+                # create a root group just as a memory object
+                group_json = {"links": {}, "attributes": {}, "cpl": {}}
+                group_json["created"] = time.time()
+                self._db[self._root_id] = group_json
+
+        self.log.debug(f"db.open() - returning root_id: {self._root_id}")
+        return self._root_id
+
+    def close(self):
+        """ close reader and writer handles """
+        self.log.info("Hdf5db __close")
+        self.flush()
+        if self.writer:
+            self.writer.close()
+        if self.reader:
+            self.reader.close()
+
+    @property
+    def closed(self):
+        return False if self.root_id else True
 
     def __enter__(self):
+        """ called on package init """
         self.log.info("Hdf5db __enter")
         return self
 
     def __exit__(self, type, value, traceback):
+        """ called on package exit """
         self.log.info("Hdf5db __exit")
-        filename = self.f.filename
-        self.f.flush()
-        self.f.close()
-        if self.dbf:
-            self.dbf.flush()
-            self.dbf.close()
-        del _db[filename]
-
-    def getTimeStampName(self, uuid, objType="object", name=None):
-        ts_name = uuid
-        if objType != "object":
-            if len(name) == 0:
-                self.log.error("empty name passed to setCreateTime")
-                raise Exception("bad setCreateTimeParameter")
-            if objType == "attribute":
-                ts_name += "_attr:["
-                ts_name += name
-                ts_name += "]"
-            elif objType == "link":
-                ts_name += "_link:["
-                ts_name += name
-                ts_name += "]"
+        self.close()
+
+    def getObjectById(self, obj_id):
+        """ return object with given id """
+        if obj_id not in self.db:
+            if self.reader:
+                # load the obj from the reader
+                obj_json = self.reader.getObjectById(obj_id)
+                self.db[obj_id] = obj_json
             else:
-                msg = "Bad objType passed to setCreateTime"
-                self.log.error(msg)
-                raise IOError(errno.EIO, msg)
-        return ts_name
-
-    """
-      setCreateTime - sets the create time timestamp for the
-            given object.
-        uuid - id of object
-        objtype - one of "object", "link", "attribute"
-        name - name (for attributes, links... ignored for objects)
-        timestamp - time (otherwise current time will be used)
-
-       returns - nothing
-
-       Note - should only be called once per object
-    """
-
-    def setCreateTime(self, uuid, objType="object", name=None, timestamp=None):
-        if not self.update_timestamps:
-            return
-        ctime_grp = self.dbGrp["{ctime}"]
-        ts_name = self.getTimeStampName(uuid, objType, name)
-        if timestamp is None:
-            timestamp = time.time()
-        if ts_name in ctime_grp.attrs:
-            self.log.warning("modifying create time for object: " + ts_name)
-        ctime_grp.attrs.create(ts_name, timestamp, dtype="int64")
-
-    """
-      getCreateTime - gets the create time timestamp for the
-            given object.
-        uuid - id of object
-        objtype - one of "object", "link", "attribute"
-        name - name (for attributes, links... ignored for objects)
-        useRoot - if true, use the time value for root object as default
-
-       returns - create time for object, or create time for root if not set
-    """
-
-    def getCreateTime(self, uuid, objType="object", name=None, useRoot=True):
-        ctime_grp = self.dbGrp["{ctime}"]
-        ts_name = self.getTimeStampName(uuid, objType, name)
-        timestamp = None
-        if ts_name in ctime_grp.attrs:
-            timestamp = ctime_grp.attrs[ts_name]
-        elif useRoot:
-            # return root timestamp
-            root_uuid = self.dbGrp.attrs["rootUUID"]
-            if root_uuid in ctime_grp.attrs:
-                timestamp = ctime_grp.attrs[root_uuid]
-        return timestamp
-
-    """
-      setModifiedTime - sets the modified time timestamp for the
-            given object.
-        uuid - id of object
-        objtype - one of "object", "link", "attribute"
-        name - name (for attributes, links... ignored for objects)
-        timestamp - time (otherwise current time will be used)
+                raise KeyError(f"obj_id: {obj_id} not found")
+        obj_json = self.db[obj_id]
 
-       returns - nothing
+        return obj_json
 
-    """
+    def getObjectIdByPath(self, h5path, parent_id=None):
+        """ Return id for the given link path starting from parent_id if set,
+        otherwise the root_id """
 
-    def setModifiedTime(self, uuid, objType="object", name=None, timestamp=None):
-        if not self.update_timestamps:
-            return
-        mtime_grp = self.dbGrp["{mtime}"]
-        ts_name = self.getTimeStampName(uuid, objType, name)
-        if timestamp is None:
-            timestamp = time.time()
-        mtime_grp.attrs.create(ts_name, timestamp, dtype="int64")
+        if self.closed:
+            self.open()  # initiate db
 
-    """
-      getModifiedTime - gets the modified time timestamp for the
-            given object.
-        uuid - id of object
-        objtype - one of "object", "link", "attribute"
-        name - name (for attributes, links... ignored for objects)
-        useRoot - if true, use the time value for root object as default
-
-       returns - create time for object, or create time for root if not set
-    """
+        if h5path == "/":
+            return self.root_id  # just return root id
 
-    def getModifiedTime(self, uuid, objType="object", name=None, useRoot=True):
-        mtime_grp = self.dbGrp["{mtime}"]
-        ts_name = self.getTimeStampName(uuid, objType, name)
-        timestamp = None
-        if ts_name in mtime_grp.attrs:
-            timestamp = mtime_grp.attrs[ts_name]
-        else:
-            # return create time if no modified time has been set
-            ctime_grp = self.dbGrp["{ctime}"]
-            if ts_name in ctime_grp.attrs:
-                timestamp = ctime_grp.attrs[ts_name]
-            elif useRoot:
-                # return root timestamp
-                root_uuid = self.dbGrp.attrs["rootUUID"]
-                timestamp = mtime_grp.attrs[root_uuid]
-        return timestamp
-
-    """
-      getAclGroup - return the db group "{acl}" if present,
-        otherwise return None
-    """
-
-    def getAclGroup(self, create=False):
-        if not self.dbGrp:
-            return None  # file not initialized
-        if "{acl}" in self.dbGrp:
-            return self.dbGrp["{acl}"]
-        if not create:
-            return None
-        self.dbGrp.create_group("{acl}")
-        return self.dbGrp["{acl}"]
-
-    """
-      getAclDtype - return detype for ACL
-    """
+        if parent_id is None:
+            parent_id = self.root_id
+        self.log.debug(f"getObjectIdDByPath(h5path: {h5path} parent_id: {parent_id}")
 
-    def getAclDtype(self):
-        fields = []
-        fields.append(("userid", np.int32))
-        fields.append(("create", np.int8))
-        fields.append(("read", np.int8))
-        fields.append(("update", np.int8))
-        fields.append(("delete", np.int8))
-        fields.append(("readACL", np.int8))
-        fields.append(("updateACL", np.int8))
-        dt = np.dtype(fields)
-        return dt
+        obj_json = self.getObjectById(parent_id)
+        if obj_json is None:
+            self.log.warning("getObjectIdDByPath - parent_id not found")
+            raise KeyError("parent_id: {parent_id} not found")
 
-    """
-      getAclDataset - return ACL datset for given uuid
-    """
-
-    def getAclDataset(self, obj_uuid, create=False):
-        acl_group = self.getAclGroup(create=create)
-
-        if acl_group is None:
-            return None
-
-        if obj_uuid in acl_group:
-            return acl_group[obj_uuid]
-
-        if not create:
-            return None
-
-        # create dataset
-        dt = self.getAclDtype()
-        acl_group.create_dataset(obj_uuid, (0,), dtype=dt, maxshape=(None,))
-        return acl_group[obj_uuid]
-
-    """
-      getNumAcls - return number of acls associatted with given uuid
-    """
+        obj_id = parent_id
+        searched_ids = set(obj_id)
 
-    def getNumAcls(self, obj_uuid):
-        acl_group = self.getAclGroup()
-        if acl_group is None:
-            return 0
-        if obj_uuid not in acl_group:
-            return 0
-        acls = acl_group[obj_uuid]
-        return acls.shape[0]
-
-    """
-      convertAclNdArrayToDict - helper function - return acl item to dict
-    """
-
-    def convertAclNdArrayToDict(self, acl_ndarray):
-        fields = acl_ndarray.dtype.fields.keys()
-        acl = {}
-        for field in fields:
-            value = int(acl_ndarray[field])
-            acl[field] = value
-        return acl
-
-    def getDefaultAcl(self):
-        """Get default acl - returns dict obj"""
-
-        dt = self.getAclDtype()
-        acl = {}
-        for field in dt.fields.keys():
-            if field == "userid":
-                acl[field] = 0
-            else:
-                acl[field] = 1  # default is allowed
-        return acl
-
-    def getAcl(self, obj_uuid, userid):
-        """
-        getAcl - return ACL for given uuid and userid
-            returns ACL associated with the given uuid, or if none exists,
-            the ACL associatted with the root group.
-
-            If an ACL is not present for a userid/obj and ACL will be returned
-            via the following precedence:
-
-            1) obj_uuid, user_id
-            2) root_uuid, user_id
-            3) obj_uuid, 0
-            4) root_uuid, 0
-            5) 'all perm' ACL
-        """
-        acl_grp = self.getAclGroup()
-
-        if acl_grp is not None:
-            acl = self.getAclByObjAndUser(obj_uuid, userid)
-            if acl is not None:
-                return acl
-
-            if obj_uuid != self.root_uuid and userid != 0:
-                # get the root acl for this user
-                acl = self.getAclByObjAndUser(self.root_uuid, userid)
-                if acl is not None:
-                    return acl
-
-            if userid != 0:
-                # get acl for default user
-                acl = self.getAclByObjAndUser(obj_uuid, 0)
-                if acl is not None:
-                    return acl
-
-            if obj_uuid != self.root_uuid:
-                # get root acl for default user
-                acl = self.getAclByObjAndUser(self.root_uuid, 0)
-                if acl is not None:
-                    return acl
-
-        # create an ACL with default permissions
-        acl = self.getDefaultAcl()
-
-        return acl
-
-    def getAclByObjAndUser(self, obj_uuid, userid):
-        """
-        get ACL for specific uuid and user
-            return None if not found
-        """
-        acl = None
-        acl_dset = self.getAclDataset(obj_uuid)
-
-        if acl_dset:
-            # iterate through elements, looking for user_id
-            acls = acl_dset[...]
-            num_acls = acl_dset.shape[0]
-            acl = None
-            for i in range(num_acls):
-                item = acls[i]
-                if item["userid"] == userid:
-                    acl = item
-                    break
-
-        if acl is not None:
-            acl = self.convertAclNdArrayToDict(acl)
-        return acl
-
-    def getAcls(self, obj_uuid):
-        """
-        getAcls - get all acls for given uuid
-        """
-        acls = []
-        acl_dset = self.getAclDataset(obj_uuid)
-
-        if acl_dset:
-            # iterate through elements, looking for user_id
-            num_acls = acl_dset.shape[0]
-
-            for i in range(num_acls):
-                item = acl_dset[i]
-                acl = self.convertAclNdArrayToDict(item)
-                acls.append(acl)
-
-        return acls
-
-    def setAcl(self, obj_uuid, acl):
-        """
-        setAcl -  set the acl for given uuid.
-        """
-        acl_dset = self.getAclDataset(obj_uuid, create=True)
-
-        if acl_dset is None:
-            msg = "Unexpected error acl not created for uuid:[" + obj_uuid + "]"
-            self.log.error(msg)
-            raise IOError(errno.EIO, msg)
-
-        userid = acl["userid"]
-
-        # iterate through elements, looking for user_id
-        acls = acl_dset[...]
-        num_acls = acl_dset.shape[0]
-
-        user_index = None
-
-        for i in range(num_acls):
-            item = acls[i]
-            if item["userid"] == userid:
-                # update this element
-                user_index = i
-                break
-
-        if user_index is None:
-            # userid not found - add row
-            acl_dset.resize(((num_acls + 1),))
-            user_index = num_acls
-
-        # update the acl dataset
-        item = acl_dset[user_index]
-        for field in acl.keys():
-            item[field] = acl[field]
-        acl_dset[user_index] = item  # save back to the file
-
-    def initFile(self):
-        # self.log.info("initFile")
-        if self.readonly:
-            self.dbGrp = self.dbf
-            if "{groups}" in self.dbf:
-                # file already initialized
-                self.root_uuid = self.dbGrp.attrs["rootUUID"]
-                return
-
-        else:
-            if "__db__" in self.f:
-                # file already initialized
-                self.dbGrp = self.f["__db__"]
-                self.root_uuid = self.dbGrp.attrs["rootUUID"]
-                return  # already initialized
-            self.dbGrp = self.f.create_group("__db__")
-
-        self.log.info("initializing file")
-        if not self.root_uuid:
-            self.root_uuid = str(uuid.uuid1())
-        self.dbGrp.attrs["rootUUID"] = self.root_uuid
-        self.dbGrp.create_group("{groups}")
-        self.dbGrp.create_group("{datasets}")
-        self.dbGrp.create_group("{datatypes}")
-        self.dbGrp.create_group("{addr}")  # store object address
-        self.dbGrp.create_group("{ctime}")  # stores create timestamps
-        self.dbGrp.create_group("{mtime}")  # store modified timestamps
-
-        mtime = op.getmtime(self.f.filename)
-        ctime = mtime
-        self.setCreateTime(self.root_uuid, timestamp=ctime)
-        self.setModifiedTime(self.root_uuid, timestamp=mtime)
-
-        self.f.visititems(visitObj)
-
-    def visit(self, path, obj):
-        name = obj.__class__.__name__
-        if len(path) >= 6 and path[:6] == "__db__":
-            return  # don't include the db objects
-        self.log.info("visit: " + path + " name: " + name)
-        col = None
-        if name == "Group":
-            col = self.dbGrp["{groups}"].attrs
-        elif name == "Dataset":
-            col = self.dbGrp["{datasets}"].attrs
-        elif name == "Datatype":
-            col = self.dbGrp["{datatypes}"].attrs
-        else:
-            msg = "Unknown object type: " + __name__ + " found during scan of HDF5 file"
-            self.log.error(msg)
-            raise IOError(errno.EIO, msg)
-        uuid1 = uuid.uuid1()  # create uuid
-        id = str(uuid1)
-        addrGrp = self.dbGrp["{addr}"]
-        if not self.readonly:
-            # storing db in the file itself, so we can link to the object directly
-            col[id] = obj.ref  # save attribute ref to object
-        else:
-            # store path to object
-            col[id] = obj.name
-        addr = h5py.h5o.get_info(obj.id).addr
-        # store reverse map as an attribute
-        addrGrp.attrs[str(addr)] = id
-
-    #
-    # Get Datset creation properties
-    #
-    def getDatasetCreationProps(self, dset_uuid):
-        prop_list = {}
-        if "{dataset_props}" not in self.dbGrp:
-            # no, group, so no properties
-            return prop_list  # return empty dict
-        dbPropsGrp = self.dbGrp["{dataset_props}"]
-
-        if dset_uuid not in dbPropsGrp.attrs:
-            return prop_list  # return empty dict
-        prop_str = dbPropsGrp.attrs[dset_uuid]
-        # expand json string
-        try:
-            prop_list = json.loads(prop_str)
-        except ValueError as ve:
-            msg = (
-                "Unable to load creation properties for dataset:["
-                + dset_uuid
-                + "]: "
-                + ve.message
-            )
-            self.log.error(msg)
-            raise IOError(errno.EIO, msg)
-
-        # fill in Filter class values
-        if "filters" in prop_list:
-            prop_filters = prop_list["filters"]
-            for prop_filter in prop_filters:
-                if "class" not in prop_filter:
-                    filter_id = prop_filter["id"]
-                    if filter_id in _HDF_FILTERS:
-                        hdf_filter = _HDF_FILTERS[filter_id]
-                        prop_filter["class"] = hdf_filter["class"]
-                    else:
-                        prop_filter["class"] = "H5Z_FILTER_USER"
-
-        return prop_list
-
-    #
-    # Set dataset creation property
-    #
-    def setDatasetCreationProps(self, dset_uuid, prop_dict):
-        self.log.info("setDataProp([" + dset_uuid + "]")
-        if not prop_dict:
-            # just ignore if empty dictionary
-            return
-        if "{dataset_props}" not in self.dbGrp:
-            self.dbGrp.create_group("{dataset_props}")
-        dbPropsGrp = self.dbGrp["{dataset_props}"]
-        if dset_uuid in dbPropsGrp.attrs:
-            # this should be write once
-            msg = (
-                "Unexpected error setting dataset creation properties for dataset:["
-                + dset_uuid
-                + "]"
-            )
-            self.log.error(msg)
-            raise IOError(errno.EIO, msg)
-        prop_str = json.dumps(prop_dict)
-        dbPropsGrp.attrs[dset_uuid] = prop_str
-
-    def getUUIDByAddress(self, addr):
-        if "{addr}" not in self.dbGrp:
-            self.log.error("expected to find {addr} group")
-            return None
-        addrGrp = self.dbGrp["{addr}"]
-        obj_uuid = None
-        if str(addr) in addrGrp.attrs:
-            obj_uuid = addrGrp.attrs[str(addr)]
-        if obj_uuid and type(obj_uuid) is not str:
-            # convert bytes to unicode
-            obj_uuid = obj_uuid.decode("utf-8")
-        return obj_uuid
-
-    def getNumLinksToObjectInGroup(self, grp, obj):
-        """
-        Get the number of links in a group to an object
-        """
-        objAddr = h5py.h5o.get_info(obj.id).addr
-        numLinks = 0
-        for name in grp:
-            try:
-                child = grp[name]
-            except KeyError:
-                # UDLink? Ignore for now
-                self.log.info("ignoring link (UDLink?): " + name)
+        link_names = h5path.split('/')
+        self.log.debug(f"link_names: {link_names}")
+        for link_name in link_names:
+            if not link_name:
                 continue
-
-            addr = h5py.h5o.get_info(child.id).addr
-            if addr == objAddr:
-                numLinks = numLinks + 1
-
-        return numLinks
-
-    def getNumLinksToObject(self, obj):
-        """
-        Get the number of links to the given object
-        """
-        self.initFile()
-        groups = self.dbGrp["{groups}"]
-        numLinks = 0
-        # iterate through each group in the file and unlink tgt if it is linked
-        # by the group
-        for uuidName in groups:
-            # iterate through anonymous groups
-            grp = groups[uuidName]
-            nLinks = self.getNumLinksToObjectInGroup(grp, obj)
-            if nLinks > 0:
-                numLinks += nLinks
-        for uuidName in groups.attrs:
-            # now non anonymous groups
-            grpRef = groups.attrs[uuidName]
-            grp = self.f[grpRef]  # dereference
-            nLinks = self.getNumLinksToObjectInGroup(grp, obj)
-            if nLinks > 0:
-                numLinks += nLinks
-        # finally, check the root group
-        root = self.getObjByPath("/")
-        nLinks = self.getNumLinksToObjectInGroup(root, obj)
-        numLinks += nLinks
-
-        return numLinks
-
-    def getUUIDByPath(self, path):
-        self.initFile()
-        self.log.info("getUUIDByPath: [" + path + "]")
-        if len(path) >= 6 and path[:6] == "__db__":
-            msg = "getUUIDByPath called with invalid path: [" + path + "]"
-            self.log.error(msg)
-            raise IOError(errno.EIO, msg)
-        if path == "/":
-            # just return the root UUID
-            root_uuid = self.dbGrp.attrs["rootUUID"]
-            if root_uuid and type(root_uuid) is not str:
-                # convert bytes to unicode
-                root_uuid = root_uuid.decode("utf-8")
-            return root_uuid
-
-        obj = self.f[path]  # will throw KeyError if object doesn't exist
-        addr = h5py.h5o.get_info(obj.id).addr
-        obj_uuid = self.getUUIDByAddress(addr)
-        return obj_uuid
-
-    def getObjByPath(self, path):
-        if len(path) >= 6 and path[:6] == "__db__":
-            return None  # don't include the db objects
-        obj = self.f[path]  # will throw KeyError if object doesn't exist
-        return obj
-
-    def getObjectByUuid(self, col_type, obj_uuid):
-        # col_type should be either "datasets", "groups", or "datatypes"
-        if col_type not in ("datasets", "groups", "datatypes"):
-            msg = "Unexpectd error, invalid col_type: [" + col_type + "]"
-            self.log.error(msg)
-            raise IOError(errno.EIO, msg)
-        if col_type == "groups" and obj_uuid == self.dbGrp.attrs["rootUUID"]:
-            return self.f["/"]  # returns root group
-
-        obj = None  # Group, Dataset, or Datatype
-        col_name = "{" + col_type + "}"
-        # get the collection group for this collection type
-        col = self.dbGrp[col_name]
-        if obj_uuid in col.attrs:
-            ref = col.attrs[obj_uuid]
-            obj = self.f[ref]  # this works for read-only as well
-        elif obj_uuid in col:
-            # anonymous object
-            obj = col[obj_uuid]
-
-        return obj
-
-    def getDatasetObjByUuid(self, obj_uuid):
-        self.initFile()
-        self.log.info("getDatasetObjByUuid(" + obj_uuid + ")")
-
-        obj = self.getObjectByUuid("datasets", obj_uuid)
-
-        return obj
-
-    def getGroupObjByUuid(self, obj_uuid):
-        self.initFile()
-        self.log.info("getGroupObjByUuid(" + obj_uuid + ")")
-
-        obj = self.getObjectByUuid("groups", obj_uuid)
-
-        return obj
-
-    def getDatasetTypeItemByUuid(self, obj_uuid):
-        dset = self.getDatasetObjByUuid(obj_uuid)  # throws exception if not found
-        item = {"id": obj_uuid}
-        item["type"] = getTypeItem(dset.dtype)
-        if self.update_timestamps:
-            item["ctime"] = self.getCreateTime(obj_uuid)
-            item["mtime"] = self.getModifiedTime(obj_uuid)
-
-        return item
-
-    def getNullReference(self):
-        """
-        getNullReference - return a null object reference
-        """
-        tmpGrp = None
-        if "{tmp}" not in self.dbGrp:
-            tmpGrp = self.dbGrp.create_group("{tmp}")
-        else:
-            tmpGrp = self.dbGrp["{tmp}"]
-        if "nullref" not in tmpGrp:
-            dt = h5py.special_dtype(ref=h5py.Reference)
-            tmpGrp.create_dataset("nullref", (1,), dtype=dt)
-        nullref_dset = tmpGrp["nullref"]
-        return nullref_dset[0]
-
-    def getNullRegionReference(self):
-        """
-        getNullRegionReference - return a null region reference
-        """
-        tmpGrp = None
-        if "{tmp}" not in self.dbGrp:
-            tmpGrp = self.dbGrp.create_group("{tmp}")
-        else:
-            tmpGrp = self.dbGrp["{tmp}"]
-            if "nullregref" not in tmpGrp:
-                dt = h5py.special_dtype(ref=h5py.RegionReference)
-                tmpGrp.create_dataset("nullregref", (1,), dtype=dt)
-                nullregref_dset = tmpGrp["nullregref"]
-                return nullregref_dset[0]
-
-    def getShapeItemByDsetObj(self, obj):
-        item = {}
-        if obj.shape is None:
-            # new with h5py 2.6, null space datasets will return None for shape
-            item["class"] = "H5S_NULL"
-        elif len(obj.shape) == 0:
-            # check to see if this is a null space vs a scalar dataset we'll do
-            # this by seeing if an exception is raised when reading the dataset
-            # h5py issue https://github.com/h5py/h5py/issues/279 will provide a
-            # better way to determine null spaces
-            # Update 3/10/17: Above issue is closed, but waiting on 2.7 final release
-            try:
-                val = obj[...]
-                if val is None:
-                    self.log.warning("no value returned for scalar dataset")
-                item["class"] = "H5S_SCALAR"
-            except IOError:
-                item["class"] = "H5S_NULL"
-        else:
-            item["class"] = "H5S_SIMPLE"
-            item["dims"] = obj.shape
-            maxshape = []
-            include_maxdims = False
-            for i in range(len(obj.shape)):
-                extent = 0
-                if len(obj.maxshape) > i:
-                    extent = obj.maxshape[i]
-                    if extent is None:
-                        extent = 0
-                    if extent > obj.shape[i] or extent == 0:
-                        include_maxdims = True
-                maxshape.append(extent)
-            if include_maxdims:
-                item["maxdims"] = maxshape
-        return item
-
-    def getShapeItemByAttrObj(self, obj):
-        item = {}
-        if obj.shape is None or obj.get_storage_size() == 0:
-            # If storage size is 0, assume this is a null space obj
-            # See: h5py issue https://github.com/h5py/h5py/issues/279
-            item["class"] = "H5S_NULL"
-        else:
-            if obj.shape:
-                item["class"] = "H5S_SIMPLE"
-                item["dims"] = obj.shape
-            else:
-                item["class"] = "H5S_SCALAR"
-        return item
-
-    #
-    # Get dataset creation properties maintained by HDF5 library
-    #
-    def getHDF5DatasetCreationProperties(self, obj_uuid, type_class):
-        dset = self.getDatasetObjByUuid(obj_uuid)
-        #
-        # Fill in creation properties
-        #
-        creationProps = {}
-        plist = h5py.h5d.DatasetID.get_create_plist(dset.id)
-
-        # alloc time
-        nAllocTime = plist.get_alloc_time()
-        if nAllocTime == h5py.h5d.ALLOC_TIME_DEFAULT:
-            creationProps["allocTime"] = "H5D_ALLOC_TIME_DEFAULT"
-        elif nAllocTime == h5py.h5d.ALLOC_TIME_LATE:
-            creationProps["allocTime"] = "H5D_ALLOC_TIME_LATE"
-        elif nAllocTime == h5py.h5d.ALLOC_TIME_EARLY:
-            creationProps["allocTime"] = "H5D_ALLOC_TIME_EARLY"
-        elif nAllocTime == h5py.h5d.ALLOC_TIME_INCR:
-            creationProps["allocTime"] = "H5D_ALLOC_TIME_INCR"
-        else:
-            self.log.warning("Unknown alloc time value: " + str(nAllocTime))
-
-        # fill time
-        nFillTime = plist.get_fill_time()
-        if nFillTime == h5py.h5d.FILL_TIME_ALLOC:
-            creationProps["fillTime"] = "H5D_FILL_TIME_ALLOC"
-        elif nFillTime == h5py.h5d.FILL_TIME_NEVER:
-            creationProps["fillTime"] = "H5D_FILL_TIME_NEVER"
-        elif nFillTime == h5py.h5d.FILL_TIME_IFSET:
-            creationProps["fillTime"] = "H5D_FILL_TIME_IFSET"
-        else:
-            self.log.warning("unknown fill time value: " + str(nFillTime))
-
-        if type_class not in ("H5T_VLEN", "H5T_OPAQUE"):
-            if plist.fill_value_defined() == h5py.h5d.FILL_VALUE_USER_DEFINED:
-                creationProps["fillValue"] = self.bytesArrayToList(dset.fillvalue)
-
-        # layout
-        nLayout = plist.get_layout()
-        if nLayout == h5py.h5d.COMPACT:
-            creationProps["layout"] = {"class": "H5D_COMPACT"}
-        elif nLayout == h5py.h5d.CONTIGUOUS:
-            creationProps["layout"] = {"class": "H5D_CONTIGUOUS"}
-        elif nLayout == h5py.h5d.CHUNKED:
-            creationProps["layout"] = {"class": "H5D_CHUNKED", "dims": dset.chunks}
-        else:
-            self.log.warning("Unknown layout value:" + str(nLayout))
-
-        num_filters = plist.get_nfilters()
-        filter_props = []
-        if num_filters:
-            for n in range(num_filters):
-                filter_info = plist.get_filter(n)
-                opt_values = filter_info[2]
-                filter_prop = {}
-                filter_id = filter_info[0]
-                filter_prop["id"] = filter_id
-                if filter_info[3]:
-                    filter_prop["name"] = self.bytesArrayToList(filter_info[3])
-                if filter_id in _HDF_FILTERS:
-                    hdf_filter = _HDF_FILTERS[filter_id]
-                    filter_prop["class"] = hdf_filter["class"]
-                    if "options" in hdf_filter:
-                        filter_opts = hdf_filter["options"]
-                        for i in range(len(filter_opts)):
-                            if len(opt_values) <= i:
-                                break  # end of option values
-                            opt_value = opt_values[i]
-                            opt_value_enum = None
-                            option_name = filter_opts[i]
-                            if option_name in _HDF_FILTER_OPTION_ENUMS:
-                                option_enums = _HDF_FILTER_OPTION_ENUMS[option_name]
-                                if opt_value in option_enums:
-                                    opt_value_enum = option_enums[opt_value]
-                            if opt_value_enum:
-                                filter_prop[option_name] = opt_value_enum
-                            else:
-                                filter_prop[option_name] = opt_value
-                else:
-                    # custom filter
-                    filter_prop["class"] = "H5Z_FILTER_USER"
-                    if opt_values:
-                        filter_prop["parameters"] = opt_values
-                filter_props.append(filter_prop)
-            creationProps["filters"] = filter_props
-
-        return creationProps
-
-    #
-    # Get dataset information - type, shape, num attributes, creation properties
-    #
-    def getDatasetItemByUuid(self, obj_uuid):
-        dset = self.getDatasetObjByUuid(obj_uuid)
-        if dset is None:
-            if self.getModifiedTime(obj_uuid, useRoot=False):
-                msg = "Dataset with uuid: " + obj_uuid + " has been previously deleted"
-                self.log.info(msg)
-                raise IOError(errno.ENOENT, msg)
+            link_tgt = None
+            self.log.debug(f"link_name: {link_name}")
+            if not obj_id:
+                break
+            if 'links' not in obj_json:
+                self.log.error(f"expected to find links key in: {obj_json}")
+                raise KeyError(h5path)
+            links = obj_json['links']
+            self.log.debug(f"links: {links}")
+            if link_name not in links:
+                self.log.warning(f"link: {link_name} not found in {obj_id}")
+                self.log.debug(f"links: {links}")
+                raise KeyError(h5path)
+            link_tgt = links[link_name]
+            self.log.debug(f"link_tgt: {link_tgt}")
+            link_class = link_tgt['class']
+            obj_id = None
+            obj_json = None
+            if link_class == 'H5L_TYPE_HARD':
+                # hard link
+                obj_id = link_tgt['id']
+                if obj_id in searched_ids:
+                    self.log.warning(f"circular reference using path: {h5path}")
+                    raise KeyError(h5path)
+                obj_json = self.getObjectById(obj_id)
+                searched_ids.add(obj_id)
+            elif link_class == 'H5L_TYPE_SOFT':
+                self.log.warning("getObjectIdByPath can't follow soft links")
+            elif link_class == 'H5L_TYPE_EXTERNAL':
+                self.log.warning("getObjectIdByPath can't follow external links")
             else:
-                msg = "Dataset with uuid: " + obj_uuid + " was not found"
-                self.log.info(msg)
-                raise IOError(errno.ENXIO, msg)
-
-        # fill in the item info for the dataset
-        item = {"id": obj_uuid}
-
-        alias = []
-        if dset.name and not dset.name.startswith("/__db__"):
-            alias.append(dset.name)  # just use the default h5py path for now
-        item["alias"] = alias
-
-        item["attributeCount"] = len(dset.attrs)
-
-        # check if the dataset is using a committed type
-        typeid = h5py.h5d.DatasetID.get_type(dset.id)
-        typeItem = None
-        if h5py.h5t.TypeID.committed(typeid):
-            type_uuid = None
-            addr = h5py.h5o.get_info(typeid).addr
-            type_uuid = self.getUUIDByAddress(addr)
-            committedType = self.getCommittedTypeItemByUuid(type_uuid)
-            typeItem = committedType["type"]
-            typeItem["uuid"] = type_uuid
-        else:
-            typeItem = getTypeItem(dset.dtype)
+                self.log.error(f"link type: {link_class} not supported")
 
-        item["type"] = typeItem
+            if not obj_id:
+                self.log.warning(f"get_bypath {h5path} not found")
+                raise KeyError(h5path)
+        return obj_id
 
-        # get shape
-        item["shape"] = self.getShapeItemByDsetObj(dset)
-
-        if self.update_timestamps:
-            item["ctime"] = self.getCreateTime(obj_uuid)
-            item["mtime"] = self.getModifiedTime(obj_uuid)
-
-        creationProps = self.getDatasetCreationProps(obj_uuid)
-        if creationProps:
-            # if chunks is not in the db props, add it from the dataset prop
-            # (so auto-chunk values can be returned)
-            if dset.chunks and "layout" not in creationProps:
-                creationProps["layout"] = {"class": "H5D_CHUNKED", "dims": dset.chunks}
-        else:
-            # no db-tracked creation properties, pull properties from library
-            creationProps = self.getHDF5DatasetCreationProperties(
-                obj_uuid, typeItem["class"]
-            )
+    def getObjectByPath(self, path):
+        """ Get Object JSON at given path """
+        obj_id = self.getObjectIdByPath(path)
+        obj_json = self.getObjectById(obj_id)
+        return obj_json
 
-        if creationProps:
-            item["creationProperties"] = creationProps
+    def getDtype(self, obj_json):
+        """ Return numpy data type for given object id
+        """
 
-        return item
+        if "type" not in obj_json:
+            # group id?
+            raise TypeError(f"{obj_json} does not have a datatype")
+        type_item = obj_json["type"]
+        if isValidUuid(type_item) and getCollectionForId(type_item) == "datatypes":
+            ctype_id = "t-" + getUuidFromId(type_item)
+            ctype_json = self.getObjectById(ctype_id)
+            if ctype_json is None:
+                raise KeyError(f"ctype: {ctype_id} not found")
 
-    def createTypeFromItem(self, attr_type):
-        """
-        createTypeFromItem - create type given dictionary definition
-        """
-        dt = None
-
-        if isinstance(attr_type, (str, bytes)) and len(attr_type) == UUID_LEN:
-            # assume attr_type is a uuid of a named datatype
-            tgt = self.getCommittedTypeObjByUuid(attr_type)
-            if tgt is None:
-                msg = (
-                    "Unable to create attribute, committed type with uuid of: "
-                    + attr_type
-                    + " not found"
-                )
-                self.log.info(msg)
-                raise IOError(errno.ENXIO, msg)
-            dt = tgt  # can use the object as the dt parameter
-        else:
-            try:
-                dt = createDataType(attr_type)
-            except KeyError as ke:
-                msg = "Unable to create type: " + str(ke)
-                self.log.info(msg)
-                raise IOError(errno.EINVAL, msg)
-            except TypeError as te:
-                msg = "Unable to create type: " + str(te)
-                self.log.info(msg)
-                raise IOError(errno.EINVAL, msg)
-            if dt is None:
-                msg = "Unexpected error creating type"
-                self.log.error(msg)
-                raise IOError(errno, errno.EIO, msg)
-        return dt
-
-    def createCommittedType(self, datatype, obj_uuid=None):
-        """
-        createCommittedType - creates new named datatype
-        Returns item
-        """
-        self.log.info("createCommittedType")
-        self.initFile()
-        if self.readonly:
-            msg = "Can't create committed type (updates are not allowed)"
-            self.log.info(msg)
-            raise IOError(errno.EPERM, msg)
-        datatypes = self.dbGrp["{datatypes}"]
-        if not obj_uuid:
-            obj_uuid = str(uuid.uuid1())
-        dt = self.createTypeFromItem(datatype)
-
-        datatypes[obj_uuid] = dt
-
-        if obj_uuid not in datatypes:
-            msg = "Unexpected failure to create committed datatype"
-            self.log.error(msg)
-            raise IOError(errno.EIO, msg)
-        newType = datatypes[obj_uuid]  # this will be a h5py Datatype class
-        # store reverse map as an attribute
-        addr = h5py.h5o.get_info(newType.id).addr
-        addrGrp = self.dbGrp["{addr}"]
-        addrGrp.attrs[str(addr)] = obj_uuid
-        # set timestamp
-        now = time.time()
-        self.setCreateTime(obj_uuid, timestamp=now)
-        self.setModifiedTime(obj_uuid, timestamp=now)
-        item = {"id": obj_uuid}
-        item["attributeCount"] = len(newType.attrs)
-        # item['type'] = hdf5dtype.getTypeItem(datatype.dtype)
-        if self.update_timestamps:
-            item["ctime"] = self.getCreateTime(obj_uuid)
-            item["mtime"] = self.getModifiedTime(obj_uuid)
-        return item
-
-    def getCommittedTypeObjByUuid(self, obj_uuid):
-        """
-        getCommittedTypeObjByUuid - get obj from {datatypes} collection
-        Returns type obj
-        """
-        self.log.info("getCommittedTypeObjByUuid(" + obj_uuid + ")")
-        self.initFile()
-        datatype = None
-        datatypesGrp = self.dbGrp["{datatypes}"]
-        if obj_uuid in datatypesGrp.attrs:
-            typeRef = datatypesGrp.attrs[obj_uuid]
-            # typeRef could be a reference or (for read-only) a path
-            datatype = self.f[typeRef]
-        elif obj_uuid in datatypesGrp:
-            datatype = datatypesGrp[obj_uuid]  # non-linked type
+            type_json = ctype_json["type"].copy()
+            type_json["id"] = ctype_id
+            dtype = createDataType(type_json)
         else:
-            msg = "Committed datatype: " + obj_uuid + " not found"
-            self.log.info(msg)
+            dtype = createDataType(type_item)
 
-        return datatype
+        return dtype
 
-    def getCommittedTypeItemByUuid(self, obj_uuid):
-        """
-        getCommittedTypeItemByUuid - get json from {datatypes} collection
-        Returns type obj
-        """
-        self.log.info("getCommittedTypeItemByUuid(" + obj_uuid + ")")
-        self.initFile()
-        datatype = self.getCommittedTypeObjByUuid(obj_uuid)
-
-        if datatype is None:
-            if self.getModifiedTime(obj_uuid, useRoot=False):
-                msg = "Datatype with uuid: " + obj_uuid + " has been previously deleted"
-                self.log.info(msg)
-                raise IOError(errno.ENOENT, msg)
-            else:
-                msg = "Datatype with uuid: " + obj_uuid + " was not found"
-                self.log.info(msg)
-                raise IOError(errno.ENXIO, msg)
-
-        item = {"id": obj_uuid}
-        alias = []
-        if datatype.name and not datatype.name.startswith("/__db__"):
-            alias.append(datatype.name)  # just use the default h5py path for now
-        item["alias"] = alias
-        item["attributeCount"] = len(datatype.attrs)
-        item["type"] = getTypeItem(datatype.dtype)
-        if self.update_timestamps:
-            item["ctime"] = self.getCreateTime(obj_uuid)
-            item["mtime"] = self.getModifiedTime(obj_uuid)
-
-        return item
-
-    def getAttributeItemByObj(self, obj, name, includeData=True):
+    def getAttribute(self, obj_id, name, includeData=True):
         """
-        Get attribute given an object and name
+        Get attribute given an object id and name
         returns: JSON object
         """
-        if name not in obj.attrs:
-            msg = "Attribute: [" + name + "] not found in object: " + obj.name
-            self.log.info(msg)
-            return None
 
-        # get the attribute!
-        attrObj = h5py.h5a.open(obj.id, np.bytes_(name))
-        attr = None
-
-        item = {"name": name}
-
-        # check if the dataset is using a committed type
-        typeid = attrObj.get_type()
-        typeItem = None
-        if h5py.h5t.TypeID.committed(typeid):
-            type_uuid = None
-            addr = h5py.h5o.get_info(typeid).addr
-            type_uuid = self.getUUIDByAddress(addr)
-            committedType = self.getCommittedTypeItemByUuid(type_uuid)
-            typeItem = committedType["type"]
-            typeItem["uuid"] = type_uuid
-        else:
-            typeItem = getTypeItem(attrObj.dtype)
-        item["type"] = typeItem
-        # todo - don't include data for OPAQUE until JSON serialization
-        # issues are addressed
+        obj_json = self.getObjectById(obj_id)
+        attrs = obj_json["attributes"]
 
-        if isinstance(typeItem, dict) and typeItem["class"] in ("H5T_OPAQUE"):
-            includeData = False
-
-        shape_json = self.getShapeItemByAttrObj(attrObj)
-        item["shape"] = shape_json
-        if shape_json["class"] == "H5S_NULL":
-            includeData = False
-        if includeData:
-            try:
-                attr = obj.attrs[name]  # returns a numpy array
-            except TypeError:
-                self.log.warning("type error reading attribute")
-
-        if includeData and attr is not None:
-            if shape_json["class"] == "H5S_SCALAR":
-                data = self.getDataValue(typeItem, attr)
-            else:
-                dims = shape_json["dims"]
-                rank = len(dims)
-                # convert numpy object to python list
-                # values = self.toList(typeItem, attr)
-                data = self.toList(rank, typeItem, attr)
-            # data = self.bytesToString(data)
-            item["value"] = data
-        # timestamps will be added by getAttributeItem()
-        return item
-
-    def getAttributeItems(self, col_type, obj_uuid, marker=None, limit=0):
-        self.log.info("db.getAttributeItems(" + obj_uuid + ")")
-        if marker:
-            self.log.info("...marker: " + marker)
-        if limit:
-            self.log.info("...limit: " + str(limit))
-
-        self.initFile()
-        obj = self.getObjectByUuid(col_type, obj_uuid)
-        if obj is None:
-            msg = "Object: " + obj_uuid + " could not be loaded"
-            self.log.info(msg)
-            raise IOError(errno.ENXIO, msg)
-
-        items = []
-        gotMarker = True
-        if marker is not None:
-            gotMarker = False
-        count = 0
-        for name in obj.attrs:
-            if not gotMarker:
-                if name == marker:
-                    gotMarker = True
-                    continue  # start filling in result on next pass
-                else:
-                    continue  # keep going!
-            item = self.getAttributeItemByObj(obj, name, False)
-            # mix-in timestamps
-            if self.update_timestamps:
-                item["ctime"] = self.getCreateTime(
-                    obj_uuid, objType="attribute", name=name
-                )
-                item["mtime"] = self.getModifiedTime(
-                    obj_uuid, objType="attribute", name=name
-                )
-
-            items.append(item)
-            count += 1
-            if limit > 0 and count == limit:
-                break  # return what we got
-        return items
-
-    def getAttributeItem(self, col_type, obj_uuid, name):
-        self.log.info(
-            "getAttributeItemByUuid(" + col_type + ", " + obj_uuid + ", " + name + ")"
-        )
-        self.initFile()
-        obj = self.getObjectByUuid(col_type, obj_uuid)
-        if obj is None:
-            msg = "Parent object: " + obj_uuid + " of attribute not found"
+        if name not in attrs:
+            msg = f"Attribute: [{name}] not found in object: {obj_id}"
             self.log.info(msg)
-            raise IOError(errno.ENXIO, msg)
             return None
-        item = self.getAttributeItemByObj(obj, name)
-        if item is None:
-            if self.getModifiedTime(
-                obj_uuid, objType="attribute", name=name, useRoot=False
-            ):
-                # attribute has been removed
-                msg = (
-                    "Attribute: ["
-                    + name
-                    + "] of object: "
-                    + obj_uuid
-                    + " has been previously deleted"
-                )
-                self.log.info(msg)
-                raise IOError(errno.ENOENT, msg)
-            msg = "Attribute: [" + name + "] of object: " + obj_uuid + " not found"
-            self.log.info(msg)
-            raise IOError(errno.ENXIO, msg)
-        # mix-in timestamps
-        if self.update_timestamps:
-            item["ctime"] = self.getCreateTime(obj_uuid, objType="attribute", name=name)
-            item["mtime"] = self.getModifiedTime(
-                obj_uuid, objType="attribute", name=name
-            )
-
-        return item
-
-    def isDimensionList(self, attr_name, attr_type):
-        """
-        isDimensionList - return True if this attribute json looks like a dimension list
-        """
-        if attr_name != "DIMENSION_LIST":
-            return False
-        if type(attr_type) is not dict:
-            return False
-        if attr_type["class"] != "H5T_VLEN":
-            return False
-        base_type = attr_type["base"]
-        if base_type["class"] != "H5T_REFERENCE":
-            return False
-        return True
-
-    def isReferenceList(self, attr_name, attr_type):
-        """
-        isReferenceList - return True if this attribute json looks like a reference list
-        """
-        if attr_name != "REFERENCE_LIST":
-            return False
-        if type(attr_type) is not dict:
-            return False
-        if attr_type["class"] != "H5T_COMPOUND":
-            return False
-
-        return True
-
-    def makeDimensionList(self, obj, shape, value):
-        """
-        makeDimensionList - work-around for h5py problems saving dimension list -
-            types which are vlen's of references are not working directly, so use dim_scale api
-            Note: this is a work-around for h5py issue:
-            https://github.com/h5py/h5py/issues/553
-        """
-        dset_refs = self.listToRef(value)
-        for i in range(len(dset_refs)):
-            refs = dset_refs[i]
-            if type(refs) not in (list, tuple):
-                msg = "Invalid dimension list value"
-                self.log.info(msg)
-                raise IOError(errno.EINVAL, msg)
-            for j in range(len(refs)):
-                scale_obj = self.f[refs[j]]
-                if scale_obj is None:
-                    self.log.warning(
-                        "dimension list, missing obj reference: " + value[i]
-                    )
-                    continue
-                if "CLASS" not in scale_obj.attrs:
-                    self.log.warning("dimension list, no scale obj")
-                    continue
-                if scale_obj.attrs["CLASS"] != b"DIMENSION_SCALE":
-                    self.log.warning("dimension list, invalid class for scale obj")
-                    continue
+        if attrs[name] is None:
+            msg = f"Attribute: [{name}] has been deleted"
+            self.log.info(None)
+            return None
 
-                try:
-                    h5py.h5ds.attach_scale(obj.id, scale_obj.id, i)
-                except RuntimeError:
-                    self.log.error("got runtime error attaching scale")
+        attr_json = attrs[name]
 
-    def writeNdArrayToAttribute(self, attrs, attr_name, npdata, shape, dt):
-        """
-        writeNdArrayToAttribute - create an attribute given numpy array
-        """
-        attrs.create(attr_name, npdata, shape=shape, dtype=dt)
+        return attr_json
 
-    def makeNullTermStringAttribute(self, obj, attr_name, strLength, value):
-        """
-        create a scalar string attribute using nullterm padding
-        """
-        self.log.info(
-            "make nullterm, length: " + str(strLength) + " value:" + str(value)
-        )
-        value = str(value)
-        if strLength < len(value):
-            self.log.warning(
-                "makeNullTermStringAttribute: value string longer than length"
-            )
-            # value = value[:strLength]  # truncate to length
-
-        if isinstance(attr_name, str):
-            try:
-                attr_name = attr_name.encode("ascii")
-            except UnicodeDecodeError:
-                raise TypeError("non-ascii attribute name not allowed")
-
-        # create the attribute
-        tid = h5py.h5t.TypeID.copy(h5py.h5t.C_S1)
-        tid.set_size(strLength)
-        tid.set_strpad(h5py.h5t.STR_NULLTERM)
-        sid = h5py.h5s.create(h5py.h5s.SCALAR)
-        aid = h5py.h5a.create(obj.id, attr_name, tid, sid)
-        # write the value
-        dtype_code = "S" + str(strLength)
-        ndarr = np.array(value, dtype=np.dtype(dtype_code))
-        aid.write(ndarr)
-
-    def makeAttribute(self, obj, attr_name, shape, attr_type, value):
+    def getAttributes(self, obj_id):
         """
-        makeAttribute - create an attribute (except for dimension list
-        attribute)
+        Get attributes given an object id and name
+        returns: JSON object
         """
-        is_committed_type = False
-        if isinstance(attr_type, str) and len(attr_type) == UUID_LEN:
-            # assume attr_type is a uuid of a named datatype
-            is_committed_type = True
 
-        dt = self.createTypeFromItem(attr_type)
+        obj_json = self.getObjectById(obj_id)
+        attrs = obj_json["attributes"]
+        names = []
+        for name in attrs:
+            if attrs[name] is not None:
+                names.append(name)
 
-        if shape is None:
-            self.log.info("shape is null - will create null space attribute")
-            # create null space attribute
-            # null space datasets/attributes not supported in h5py yet:
-            # See: https://github.com/h5py/h5py/issues/279
-            # work around this by using low-level interface.
-            # first create a temp scalar dataset so we can pull out the typeid
-            tmpGrp = None
-            if "{tmp}" not in self.dbGrp:
-                tmpGrp = self.dbGrp.create_group("{tmp}")
-            else:
-                tmpGrp = self.dbGrp["{tmp}"]
-            tmpGrp.attrs.create(attr_name, 0, shape=(), dtype=dt)
-            b_attr_name = attr_name.encode("utf-8")
-            tmpAttr = h5py.h5a.open(tmpGrp.id, name=b_attr_name)
-            if not tmpAttr:
-                msg = "Unexpected error creating datatype for nullspace attribute"
-                self.log.error(msg)
-                raise IOError(errno.EIO, msg)
-            tid = tmpAttr.get_type()
-            sid = sid = h5py.h5s.create(h5py.h5s.NULL)
-            # now create the permanent attribute
-            if attr_name in obj.attrs:
-                self.log.info("deleting attribute: " + attr_name)
-                del obj.attrs[attr_name]
-            attr_id = h5py.h5a.create(obj.id, b_attr_name, tid, sid)
-            # delete the temp attribute
-            del tmpGrp.attrs[attr_name]
-            if not attr_id:
-                msg = "Unexpected error creating nullspace attribute"
-                self.log.error(msg)
-                raise IOError(errno.EIO, msg)
-        else:
-            if type(value) is tuple:
-                value = list(value)
-            if type(shape) is list:
-                shape = tuple(shape)
-            if not is_committed_type:
-                # apparently committed types can not be used as reference types
-                # todo - verify why that is
-
-                rank = len(shape)
-                # convert python list to numpy object
-                strPad = None
-                strLength = 0
-                if (
-                    isinstance(attr_type, dict)
-                    and attr_type["class"] == "H5T_STRING"
-                    and "strPad" in attr_type
-                ):
-                    strPad = attr_type["strPad"]
-                    strLength = attr_type["length"]
-
-                if (
-                    rank == 0
-                    and isinstance(strLength, int)
-                    and strPad == "H5T_STR_NULLTERM"
-                ):
-                    self.makeNullTermStringAttribute(obj, attr_name, strLength, value)
-                else:
-                    typeItem = getTypeItem(dt)
-                    value = self.toRef(rank, typeItem, value)
-
-                    # create numpy array
-                    npdata = np.zeros(shape, dtype=dt)
-
-                    if rank == 0:
-                        npdata[()] = self.toNumPyValue(attr_type, value, npdata[()])
-                    else:
-                        self.toNumPyArray(rank, attr_type, value, npdata)
-
-                    self.writeNdArrayToAttribute(
-                        obj.attrs, attr_name, npdata, shape, dt
-                    )
+        return names
 
-    """
-    createAttribute - create an attribute
-    """
-
-    def createAttribute(self, col_name, obj_uuid, attr_name, shape, attr_type, value):
-        self.log.info("createAttribute: [" + attr_name + "]")
-
-        self.initFile()
-        if self.readonly:
-            msg = "Unable to create attribute (updates are not allowed)"
-            self.log.info(msg)
-            raise IOError(errno.EPERM, msg)
-        obj = self.getObjectByUuid(col_name, obj_uuid)
-        if not obj:
-            msg = "Object with uuid: " + obj_uuid + " not found"
-            self.log.info(msg)
-            raise IOError(errno.ENXIO, msg)
-
-        if self.isDimensionList(attr_name, attr_type):
-            self.makeDimensionList(obj, shape, value)
-        elif self.isReferenceList(attr_name, attr_type):
-            pass  # Skip since reference list will be created by attach scale
+    def getAttributeValue(self, obj_id, name):
+        """ Return NDArray of the given attribute value """
+        attr_json = self.getAttribute(obj_id, name)
+        shape_json = attr_json["shape"]
+        if shape_json["class"] == "H5S_NULL":
+            # no value for empty shape attributes
+            return None
+        elif shape_json["class"] == "H5S_SCALAR":
+            dims = ()
         else:
-            self.makeAttribute(obj, attr_name, shape, attr_type, value)
-
-        now = time.time()
-        self.setCreateTime(obj_uuid, objType="attribute", name=attr_name, timestamp=now)
-        self.setModifiedTime(
-            obj_uuid, objType="attribute", name=attr_name, timestamp=now
-        )
-        self.setModifiedTime(obj_uuid, timestamp=now)  # owner entity is modified
-
-    def deleteAttribute(self, col_name, obj_uuid, attr_name):
-        self.initFile()
-        if self.readonly:
-            msg = "Unable to delete attribute (updates are not allowed)"
-            self.log.info(msg)
-            raise IOError(errno.EPERM, msg)
-        obj = self.getObjectByUuid(col_name, obj_uuid)
-
-        if attr_name not in obj.attrs:
-            msg = (
-                "Attribute with name: ["
-                + attr_name
-                + "] of object: "
-                + obj_uuid
-                + " not found"
-            )
-            self.log.info(msg)
-            raise IOError(errno.ENXIO, msg)
+            dims = shape_json["dims"]
+        dtype = self.getDtype(attr_json)
 
-        del obj.attrs[attr_name]
-        now = time.time()
-        self.setModifiedTime(
-            obj_uuid, objType="attribute", name=attr_name, timestamp=now
-        )
+        value = attr_json["value"]
+        arr = jsonToArray(dims, dtype, value)
 
-        return True
-
-    """
-      Return a json-serializable representation of the numpy value
-    """
+        return arr
 
-    def getDataValue(self, typeItem, value, dimension=0, dims=None):
-        if dimension > 0:
-            if type(dims) not in (list, tuple):
-                msg = "unexpected type for type array dimensions"
-                self.log.error(msg)
-                raise IOError(errno.EIO, msg)
-            out = []
-            rank = len(dims)
-            if dimension > rank:
-                msg = "unexpected dimension for type array"
-                self.log.error(msg)
-                raise IOError(errno.EIO, msg)
-            nElements = dims[rank - dimension]
-            for i in range(nElements):
-                item_value = self.getDataValue(
-                    typeItem, value[i], dimension=(dimension - 1), dims=dims
-                )
-                out.append(item_value)
-            return out  # done for array case
-
-        out = None
-        typeClass = typeItem["class"]
-        if isinstance(value, (np.ndarray, np.generic)):
-            value = value.tolist()  # convert numpy object to list
-        if typeClass == "H5T_COMPOUND":
-            if type(value) not in (list, tuple):
-                msg = "Unexpected type for compound value"
-                self.log.error(msg)
-                raise IOError(errno.EIO, msg)
-
-            fields = typeItem["fields"]
-            if len(fields) != len(value):
-                msg = "Number of elements in compound type does not match type"
-                self.log.error(msg)
-                raise IOError(errno.EIO, msg)
-            nFields = len(fields)
-            out = []
-            for i in range(nFields):
-                field = fields[i]
-                item_value = self.getDataValue(field["type"], value[i])
-                out.append(item_value)
-        elif typeClass == "H5T_VLEN":
-            if type(value) not in (list, tuple):
-                msg = "Unexpected type for vlen value"
-                self.log.error(msg)
-                raise IOError(errno.EIO, msg)
-
-            baseType = typeItem["base"]
-            out = []
-            nElements = len(value)
-            for i in range(nElements):
-                item_value = self.getDataValue(baseType, value[i])
-                out.append(item_value)
-        elif typeClass == "H5T_REFERENCE":
-            out = self.refToList(value)
-        elif typeClass == "H5T_OPAQUE":
-            out = "???"  # todo
-        elif typeClass == "H5T_ARRAY":
-            type_dims = typeItem["dims"]
-            if type(type_dims) not in (list, tuple):
-                msg = "unexpected type for type array dimensions"
-                self.log.error(msg)
-                raise IOError(errno.EIO, msg)
-            rank = len(type_dims)
-            baseType = typeItem["base"]
-            out = self.getDataValue(baseType, value, dimension=rank, dims=type_dims)
-
-        elif typeClass in ("H5T_INTEGER", "H5T_FLOAT", "H5T_ENUM"):
-            out = value  # just copy value
-        elif typeClass == "H5T_STRING":
-            if "charSet" in typeItem:
-                charSet = typeItem["charSet"]
-            else:
-                charSet = "H5T_CSET_ASCII"
-            if charSet == "H5T_CSET_ASCII" and isinstance(value, bytes):
-                out = value.decode("utf-8")
-            else:
-                out = value
-        else:
-            msg = "Unexpected type class: " + typeClass
-            self.log.info(msg)
-            raise IOError(errno.ENINVAL, msg)
-        return out
-
-    def getRefValue(self, typeItem: dict, value: list):
+    def createAttribute(self, obj_id, name, value, shape=None, dtype=None):
         """
-        Return a numpy value based on json representation
+        create an attribute - will override any existing attributes
         """
-        out = None
-        typeClass = typeItem["class"]
-        if typeClass == "H5T_COMPOUND":
-            if not isinstance(value, (list, tuple)):
-                msg = f"Unexpected type for compound value: {type(value)}"
-                self.log.error(msg)
-                raise IOError(errno.EIO, msg)
-
-            fields = typeItem["fields"]
-            if len(fields) != len(value):
-                msg = "Number of elements in compound type does not match type"
-                self.log.error(msg)
-                raise IOError(errno.EIO, msg)
-            nFields = len(fields)
-            out = []
-            for i in range(nFields):
-                field = fields[i]
-                item_value = self.getRefValue(field["type"], value[i])
-                out.append(item_value)
-        elif typeClass == "H5T_VLEN":
-            if type(value) not in (list, tuple):
-                msg = "Unexpected type for vlen value"
-                self.log.error(msg)
-                raise IOError(errno.EIO, msg)
-
-            baseType = typeItem["base"]
-            out = []
-            nElements = len(value)
-            for i in range(nElements):
-                item_value = self.getRefValue(baseType, value[i])
-                out.append(item_value)
-        elif typeClass == "H5T_REFERENCE":
-            out = self.listToRef(value)
-        elif typeClass == "H5T_OPAQUE":
-            out = "???"  # todo
-        elif typeClass == "H5T_ARRAY":
-            out = self.toRef(len(typeItem["dims"]), typeItem["base"], value)
-        elif typeClass in ("H5T_INTEGER", "H5T_FLOAT", "H5T_ENUM"):
-            out = value  # just copy value
-        elif typeClass == "H5T_STRING":
-            if typeItem["charSet"] == "H5T_CSET_UTF8":
-                # out = value.encode('utf-8')
-                out = value
-            else:
-                out = value.encode()
-        else:
-            msg = "Unexpected type class: " + typeClass
-            self.log.info(msg)
-            raise IOError(errno.ENINVAL, msg)
 
-        if isinstance(out, list):
-            out = tuple(out)  # convert to tuple
-        return out
+        # TBD: if dtype is a committed ref type, fetch it first
+        # TBD: also, check special case for complex types
 
-    """
-      Return a numpy value based on json representation
-    """
+        if isinstance(dtype, str) and dtype.startswith("datatypes/"):
+            ctype_id = dtype[len("datatypes/"):]
+            if getCollectionForId(ctype_id) != "datatypes":
+                raise TypeError(f"unexpected dtype value for createAttribute: {dtype}")
+            if ctype_id not in self.db:
+                raise KeyError(f"ctype: {ctype_id} not found")
+            ctype_json = self.getObjectById(ctype_id)
+            type_json = ctype_json["type"].copy()
+            type_json["id"] = ctype_id
+            dtype = createDataType(type_json)
 
-    def toNumPyValue(self, typeItem, src, des):
-        typeClass = "H5T_INTEGER"  # default to int type
-        if type(typeItem) is dict:
-            typeClass = typeItem["class"]
-        if typeClass == "H5T_COMPOUND":
-            fields = typeItem["fields"]
-            if len(fields) != len(src):
-                msg = "Number of elements in compound type does not match type"
-                self.log.error(msg)
-                raise IOError(errno.EIO, msg)
-            nFields = len(fields)
-
-            for i in range(nFields):
-                field = fields[i]
-                field_name = field["name"]
-                des[field_name] = src[i]
-
-        elif typeClass == "H5T_VLEN":
-            if type(src) not in (list, tuple):
-                msg = "Unexpected type for vlen value"
-                self.log.error(msg)
-                raise IOError(errno.EIO, msg)
-
-            baseType = typeItem["base"]
-
-            dt = self.createTypeFromItem(baseType)
-            des = np.array(src, dtype=dt)
-
-        elif typeClass == "H5T_REFERENCE":
-            des = src  # self.listToRef(src)
-
-        elif typeClass == "H5T_OPAQUE":
-            des = "???"  # todo
-        elif typeClass == "H5T_ARRAY":
-            des = src
-        elif typeClass in ("H5T_INTEGER", "H5T_FLOAT", "H5T_ENUM"):
-            des = src  # just copy value
-        elif typeClass == "H5T_STRING":
-            if typeItem["charSet"] == "H5T_CSET_UTF8":
-                des = src  # src.encode('utf-8')
+        # First, make sure we have a NumPy array
+        if isinstance(value, Reference) and dtype is None:
+            dtype = special_dtype(ref=Reference)
+        if shape == "H5S_NULL":
+            if value:
+                raise ValueError("Value can't be set for Null space attributes")
+            if dtype is None:
+                raise ValueError("Dtype must be set for Null space attributes")
             else:
-                if type(src) is str:
-                    try:
-                        src.encode("ascii")
-                    except UnicodeDecodeError:
-                        raise TypeError(
-                            "non-ascii value not allowed with H5T_CSET_ASCII"
-                        )
-                des = src
-
+                dtype = np.dtype(dtype)
         else:
-            msg = "Unexpected type class: " + typeClass
-            self.log.info(msg)
-            raise IOError(errno.ENINVAL, msg)
-        return des
-
-    """
-       copy src data to numpy array
-    """
-
-    def toNumPyArray(self, rank, typeItem, src, des):
-        if rank == 0:
-            msg = "unexpected rank value"
-            self.log.error(msg)
-            raise IOError(errno.EIO, msg)  # shouldn't be called with rank 0
-
-        for i in range(len(des)):
-            des_sec = des[i]  # numpy slab
-
-            src_sec = src[i]
-
-            if rank > 1:
-                self.toNumPyArray(rank - 1, typeItem, src_sec, des_sec)
+            value = np.asarray(value, dtype=dtype, order='C')
+            if dtype is None:
+                dtype = value.dtype
             else:
-                rv = self.toNumPyValue(typeItem, src_sec, des_sec)
-                # if the numpy object is writeable, des_sec will be
-                # already updated.  Otherwise, update the des by assignment
-                if not hasattr(des_sec, "flags") or not des_sec.flags["WRITEABLE"]:
-                    des[i] = rv
-
-    def toRef(self, rank, typeItem, data):
-        """
-        Convert json list to h5py compatible values
-        """
-        out = None
-
-        if isinstance(typeItem, str):
-            # commited type - get json representation
-            committed_type_item = self.getCommittedTypeItemByUuid(typeItem)
-            typeItem = committed_type_item["type"]
+                dtype = np.dtype(dtype)  # In case a string, e.g. 'i8' is passed
 
-        typeClass = typeItem["class"]
-        if typeClass in ("H5T_INTEGER", "H5T_FLOAT"):
-            out = data  # just use as is
+        # Where a top-level array type is requested, we have to do some
+        # fiddling around to present the data as a smaller array of
+        # sub-arrays.
+        if value is not None:
+            if dtype.subdtype is not None:
+                subdtype, subshape = dtype.subdtype
 
-        elif rank == 0:
-            # scalar value
-            out = self.getRefValue(typeItem, data)
-        else:
-            out = []
-            for item in data:
-                if rank > 1:
-                    out_item = self.toRef(rank - 1, typeItem, item)
-                    out.append(out_item)
-                else:
-                    out_item = self.getRefValue(typeItem, item)
-                    out.append(out_item)
+                # Make sure the subshape matches the last N axes' sizes.
+                if shape[-len(subshape):] != subshape:
+                    raise ValueError(f"Array dtype shape {subshape} is incompatible with data shape {shape}")
 
-        return out
+                # New "advertised" shape and dtype
+                shape = shape[0:len(shape) - len(subshape)]
+                dtype = subdtype
 
-    """
-       Convert list to json serializable values.
-    """
+            # Not an array type; make sure to check the number of elements
+            # is compatible, and reshape if needed.
+            else:
+                if isinstance(shape, tuple):
+                    if np.prod(shape) != np.prod(value.shape):
+                        raise ValueError("Shape of new attribute conflicts with shape of data")
 
-    def toList(self, rank, typeItem, data):
-        out = None
-        typeClass = typeItem["class"]
-        if typeClass in ("H5T_INTEGER", "H5T_FLOAT"):
-            out = data.tolist()  # just use as is
+                    if shape != value.shape:
+                        value = value.reshape(shape)
 
-        elif rank == 0:
-            # scalar value
-            out = self.getDataValue(typeItem, data)
-        else:
-            out = []
-            for item in data:
-                if rank > 1:
-                    out_item = self.toList(rank - 1, typeItem, item)
-                    out.append(out_item)
-                else:
-                    out_item = self.getDataValue(typeItem, item)
-                    out.append(out_item)
+                # We need this to handle special string types.
+                value = np.asarray(value, dtype=dtype)
 
-        return out
+            value_json = bytesArrayToList(value)
 
-    """
-       Create ascii representation of vlen data object
-    """
-
-    def vlenToList(self, data):
-        # todo - verify that data is a numpy.ndarray
-        out = None
-        if len(data.shape) == 0:
-            out = []
         else:
-            try:
-                if data.dtype.kind != "O":
-                    out = data.tolist()
-                else:
-                    out = []
-                    for item in data:
-                        out.append(self.vlenToList(item))  # recursive call
-            except AttributeError:
-                # looks like this is not a numpy ndarray, just return the value
-                out = data
-        return out
-
-    """
-       Create ascii representation of ref data object
-    """
+            value_json = None
 
-    def refToList(self, data):
-        # todo - verify that data is a numpy.ndarray
-        out = None
-        if type(data) is h5py.h5r.Reference:
-            if bool(data):
-                grpref = self.f[data]
-                addr = h5py.h5o.get_info(grpref.id).addr
-                uuid = self.getUUIDByAddress(addr)
-                if self.getGroupObjByUuid(uuid):
-                    out = "groups/" + uuid
-                elif self.getDatasetObjByUuid(uuid):
-                    out = "datasets/" + uuid
-                elif self.getCommittedTypeObjByUuid(uuid):
-                    out = "datatypes/" + uuid
-                else:
-                    self.log.warning("uuid in region ref not found: [" + uuid + "]")
-                    return None
-            else:
-                out = "null"
-        elif type(data) is h5py.h5r.RegionReference:
-            out = self.getRegionReference(data)
+        if shape is None:
+            shape = value.shape
+        if shape == "H5S_NULL":
+            shape_json = {"class": "H5S_NULL"}
+        elif len(shape) == 0:
+            shape_json = {"class": "H5S_SCALAR"}
         else:
-            out = []
-            for item in data:
-                out.append(self.refToList(item))  # recursive call
-        return out
+            shape_json = {"class": "H5S_SIMPLE"}
+            shape_json["dims"] = list(shape)
 
-    """
-       Convert ascii representation of data references to data ref
-    """
-
-    def listToRef(self, data):
-        out = None
-        if not data:
-            # null reference
-            out = self.getNullReference()
-        elif isinstance(data, (bytes, str)):
-            obj_ref = None
-            # object reference should be in the form: <collection_name>/<uuid>
-            for prefix in ("datasets", "groups", "datatypes"):
-                if data.startswith(prefix):
-                    uuid_ref = data[len(prefix) :]
-                    if len(uuid_ref) == (UUID_LEN + 1) and uuid_ref.startswith("/"):
-                        obj = self.getObjectByUuid(prefix, uuid_ref[1:])
-                        if obj:
-                            obj_ref = obj.ref
-                        else:
-                            msg = (
-                                "Invalid object reference value: ["
-                                + uuid_ref
-                                + "] not found"
-                            )
-                            self.log.info(msg)
-                            raise IOError(errno.ENXIO, msg)
-                    break
-            if not obj_ref:
-                msg = "Invalid object reference value: [" + data + "]"
-                self.log.info(msg)
-                raise IOError(errno.EINVAL, msg)
-            else:
-                out = obj_ref
-
-        elif isinstance(data, (list, tuple)):
-            out = []
-            for item in data:
-                out.append(self.listToRef(item))  # recursive call
-        elif isinstance(data, dict):
-            # assume region ref
-            out = self.createRegionReference(data)
-        else:
-            msg = "Invalid object reference value type: [" + str(type(data)) + "]"
-            self.log.info(msg)
-            raise IOError(errno.EINVAL, msg)
-        return out
+        obj_json = self.getObjectById(obj_id)
+        attrs_json = obj_json["attributes"]
+        type_json = getTypeItem(dtype)
+        # finally put it all together...
+        attr_json = {"shape": shape_json, "type": type_json, "value": value_json}
+        attr_json["created"] = time.time()
 
-    def bytesArrayToList(self, data):
-        """
-        Convert list that may contain bytes type elements to list of string elements
-        """
-        if isinstance(data, (bytes, str)):
-            is_list = False
-        elif isinstance(data, (np.ndarray, np.generic)):
-            if len(data.shape) == 0:
-                is_list = False
-                data = data.tolist()  # tolist will return a scalar in this case
-                if isinstance(data, (list, tuple)):
-                    is_list = True
-                else:
-                    is_list = False
-            else:
-                is_list = True
-        elif isinstance(data, (list, tuple)):
-            is_list = True
-        else:
-            is_list = False
-
-        if is_list:
-            out = []
-            for item in data:
-                out.append(self.bytesArrayToList(item))  # recursive call
-        elif isinstance(data, bytes):
-            out = data.decode("utf-8")
-        else:
-            out = data
+        # slot into the obj_json["attrs"]
+        attrs_json[name] = attr_json
 
-        return out
+        # mark object as dirty
+        self.make_dirty(obj_id)
 
-    def getRegionReference(self, regionRef):
-        """
-        Get item description of region reference value
-        """
-        selectionEnums = {
-            h5py.h5s.SEL_NONE: "H5S_SEL_NONE",
-            h5py.h5s.SEL_ALL: "H5S_SEL_ALL",
-            h5py.h5s.SEL_POINTS: "H5S_SEL_POINTS",
-            h5py.h5s.SEL_HYPERSLABS: "H5S_SEL_HYPERSLABS",
-        }
-
-        item = {}
-        objid = h5py.h5r.dereference(regionRef, self.f.file.file.id)
-        if objid:
-            item["id"] = self.getUUIDByAddress(h5py.h5o.get_info(objid).addr)
-        else:
-            self.log.info("region reference unable to find item with objid: " + objid)
-            return item
-
-        sel = h5py.h5r.get_region(regionRef, objid)
-        select_type = sel.get_select_type()
-        if select_type not in selectionEnums:
-            msg = "Unexpected selection type: " + regionRef.typecode
-            self.log.error(msg)
-            raise IOError(errno.EIO, msg)
-        item["select_type"] = selectionEnums[select_type]
-        pointlist = None
-        if select_type == h5py.h5s.SEL_POINTS:
-            # retrieve a numpy array of selection points
-            points = sel.get_select_elem_pointlist()
-            pointlist = points.tolist()
-        elif select_type == h5py.h5s.SEL_HYPERSLABS:
-            points = sel.get_select_hyper_blocklist()
-            if points is not None:
-                pointlist = points[...].tolist()
-                # bump up the second coordinate by one to match api spec
-                for point in pointlist:
-                    coord2 = point[1]
-                    for i in range(len(coord2)):
-                        coord2[i] = coord2[i] + 1
-
-        item["selection"] = pointlist
-
-        return item
-
-    def createRegionReference(self, item):
-        """
-        Create region reference from item description of region reference value
-        """
-        selectionEnums = {
-            "H5S_SEL_NONE": h5py.h5s.SEL_NONE,
-            "H5S_SEL_ALL": h5py.h5s.SEL_ALL,
-            "H5S_SEL_POINTS": h5py.h5s.SEL_POINTS,
-            "H5S_SEL_HYPERSLABS": h5py.h5s.SEL_HYPERSLABS,
-        }
-        region_ref = None
-
-        if "select_type" not in item:
-            msg = "select_type not provided for region selection"
-            self.log.info(msg)
-            raise IOError(errno.EINVAL, msg)
-        select_type = item["select_type"]
-        if select_type not in selectionEnums.keys():
-            msg = "selection type: [" + select_type + "] is not valid"
-            self.log.info(msg)
-            raise IOError(errno.EINVAL, msg)
-        dset = None
-        if select_type == "H5S_SEL_NONE":
-            if "id" not in item:
-                #        select none on null dataset, return null ref
-                out = self.getNullReference()
-                return out
-        else:  # select_type != 'H5S_SEL_NONE'
-            if "id" not in item:
-                msg = "id not provided for region selection"
-                self.log.info(msg)
-                raise IOError(errno.EINVAL, msg)
-
-        # Otherwise need to provide uuid of dataset
-        uuid_ref = item["id"]
-        if len(uuid_ref) != UUID_LEN:
-            msg = "uuid value: [" + uuid_ref + "] for region reference is not valid"
-            self.log.info(msg)
-            raise IOError(errno.EINVAL, msg)
+    def deleteAttribute(self, obj_id, name):
+        """ delete the given attribute """
+        obj_json = self.getObjectById(obj_id)
+        attrs_json = obj_json["attributes"]
+        if name not in attrs_json:
+            raise KeyError(f"attribute [{name}] not found in {obj_id}")
+        attrs_json[name] = None  # mark key for deletion
 
-        obj = self.getObjectByUuid("datasets", uuid_ref)
-        if obj:
-            dset = obj
-        else:
-            msg = "Invalid region refence value: [" + uuid_ref + "] not found"
-            self.log.info(msg)
-            raise IOError(errno.EINVAL, msg)
-
-        if select_type in ("H5S_SEL_POINTS", "H5S_SEL_HYPERSLABS"):
-            if "selection" not in item:
-                msg = "selection key not provided for region selection"
-                self.log.info(msg)
-                raise IOError(errno.EINVAL, msg)
-
-        rank = len(dset.shape)
-        space_id = h5py.h5d.DatasetID.get_space(dset.id)
-        h5py.h5s.SpaceID.select_none(space_id)
-
-        if select_type == "H4S_SEL_NONE":
-            pass  # did select_none above
-        elif select_type == "H5S_SEL_ALL":
-            h5py.h5s.SpaceID.select_all(space_id)
-        elif select_type == "H5S_SEL_POINTS":
-            selection = item["selection"]
-            for point in selection:
-                if len(point) != rank:
-                    msg = "point selection number of elements must mach rank of referenced dataset"
-                    self.log.info(msg)
-                    raise IOError(errno.EINVAL, msg)
-            h5py.h5s.SpaceID.select_elements(space_id, selection)
-        elif select_type == "H5S_SEL_HYPERSLABS":
-            selection = item["selection"]
-
-            for slab in selection:
-                # each item should be a two element array defining the hyperslab boundary
-                if len(slab) != 2:
-                    msg = "selection value not valid (not a 2 element array)"
-                    self.log.info(msg)
-                    raise IOError(errno.EINVAL, msg)
-                start = slab[0]
-                if isinstance(start, list):
-                    start = tuple(start)
-                if type(start) is not tuple or len(start) != rank:
-                    msg = "selection value not valid, start element should have number "
-                    msg += "elements equal to rank of referenced dataset"
-                    self.log.info(msg)
-                    raise IOError(errno.EINVAL, msg)
-                stop = slab[1]
-                if isinstance(stop, list):
-                    stop = tuple(stop)
-                if type(stop) is not tuple or len(stop) != rank:
-                    msg = "selection value not valid, count element should have number "
-                    msg += "elements equal to rank of referenced dataset"
-                    self.log.info(msg)
-                    raise IOError(errno.EINVAL, msg)
-                count = []
-                for i in range(rank):
-                    if start[i] < 0:
-                        msg = "start value for hyperslab selection must be non-negative"
-                        self.log.info(msg)
-                        raise IOError(errno.EINVAL, msg)
-                    if stop[i] <= start[i]:
-                        msg = "stop value must be greater than start value for hyperslab selection"
-                        self.log.info(msg)
-                        raise IOError(errno.EINVAL, msg)
-                    count.append(stop[i] - start[i])
-                count = tuple(count)
-
-                h5py.h5s.SpaceID.select_hyperslab(
-                    space_id, start, count, op=h5py.h5s.SELECT_OR
-                )
-
-        # now that we've selected the desired region in the space, return a region reference
-        dset_name = dset.name.encode("utf-8")
-        region_ref = h5py.h5r.create(
-            self.f.id, dset_name, h5py.h5r.DATASET_REGION, space_id
-        )
-
-        return region_ref
-
-    def toTuple(self, rank, data):
-        """
-        Convert a list to a tuple, recursively.
-        Example. [[1,2],[3,4]] -> ((1,2),(3,4))
-        """
-        if isinstance(data, (list, tuple)):
-            if rank > 0:
-                return list(self.toTuple(rank - 1, x) for x in data)
-            else:
-                return tuple(self.toTuple(rank - 1, x) for x in data)
-        else:
-            return data
+        self.make_dirty(obj_id)
 
-    def getDatasetValuesByUuid(self, obj_uuid, slices=Ellipsis, format="json"):
+    def getDatasetValues(self, dset_id, sel):
         """
-        Get values from dataset identified by obj_uuid.
+        Get values from dataset identified by obj_id.
         If a slices list or tuple is provided, it should have the same
         number of elements as the rank of the dataset.
         """
-        dset = self.getDatasetObjByUuid(obj_uuid)
-        if format not in ("json", "binary"):
-            msg = "only json and binary formats are supported"
-            self.log.info(msg)
-            raise IOError(errno.EINVAL, msg)
-
-        if dset is None:
-            msg = "Dataset: " + obj_uuid + " not found"
-            self.log.info(msg)
-            raise IOError(errno.ENXIO, msg)
-
-        values = None
-        dt = dset.dtype
-        typeItem = getTypeItem(dt)
-        itemSize = getItemSize(typeItem)
-        if itemSize == "H5T_VARIABLE" and format == "binary":
-            msg = "Only JSON is supported for for this data type"
-            self.log.info(msg)
-            raise IOError(errno.EINVAL, msg)
-
-        if dset.shape is None:
-            # null space dataset (with h5py 2.6.0)
-            return None
-
-        rank = len(dset.shape)
-
-        if rank == 0:
-            # check for null dataspace
-            try:
-                val = dset[...]
-            except IOError:
-                # assume null dataspace, return none
-                return None
-            if val is None:
-                self.log.warning("no value returned from scalar dataset")
-
-        if not isinstance(slices, (list, tuple)) and slices is not Ellipsis:
-            msg = "Unexpected error: getDatasetValuesByUuid: bad type for dim parameter"
-            self.log.error(msg)
-            raise IOError(errno.EIO, msg)
-
-        if isinstance(slices, (list, tuple)) and len(slices) != rank:
-            msg = "Unexpected error: getDatasetValuesByUuid: number of dims in selection not same as rank"
-            self.log.error(msg)
-            raise IOError(errno.EIO, msg)
-
-        if dt.kind == "O":
-            if format != "json":
-                msg = "Only JSON is supported for for this data type"
-                self.log.info(msg)
-                raise IOError(errno.EINVAL, msg)
-            # numpy object type - could be a vlen string or generic vlen
-            h5t_check = h5py.h5t.check_dtype(vlen=dt)
-            if h5t_check is str or h5t_check is bytes:
-                values = self.bytesArrayToList(dset[slices])
-            elif h5t_check is not None:
-                # other vlen data
-                values = self.vlenToList(dset[slices])
-            else:
-                # check for reference type
-                h5t_check = h5py.h5t.check_dtype(ref=dt)
-                if h5t_check is not None:
-                    # reference type
-                    values = self.refToList(dset[slices])
-                else:
-                    msg = "Unexpected error, object type unknown"
-                    self.log.error(msg)
-                    raise IOError(errno.EIO, msg)
-        elif dt.kind == "V" and len(dt) <= 1 and len(dt.shape) == 0 and not dt.names:
-            # opaque type - skip for now
-            self.log.warning("unable to get opaque type values")
-            values = "????"
-        elif dt.kind == "S" and format == "json":
-            values = self.bytesArrayToList(dset[slices])
-        elif len(dt) > 1 or dt.names:
-            # compound type
-            if format == "json":
-                values = self.bytesArrayToList(dset[slices])
-            else:
-                values = dset[slices].tobytes()
-        else:
-            values = dset[slices]
-
-            # just use tolist to dump
-            if format == "json":
-                values = values.tolist()
-            else:
-                # values = base64.b64encode(dset[slices].tobytes())
-                values = values.tobytes()
+        self.log.info(f"getDatasetValues dset_id: {dset_id}, sel: {sel}")
+        dset_json = self.getObjectById(dset_id)
+        shape_json = dset_json["shape"]
+        if not isinstance(sel, selections.Selection):
+            raise TypeError("Expected Selection class")
 
-        return values
-
-    """
-      doDatasetQueryByUuid: return rows based on query string
-        Return rows from a dataset that matches query string.
-
-        Note: Only supported for compound_type/one-dimensional datasets
-    """
-
-    def doDatasetQueryByUuid(
-        self, obj_uuid, query, start=0, stop=-1, step=1, limit=None
-    ):
-        self.log.info("doQueryByUuid - uuid: " + obj_uuid + " query:" + query)
-        self.log.info(
-            "start: "
-            + str(start)
-            + " stop: "
-            + str(stop)
-            + " step: "
-            + str(step)
-            + " limit: "
-            + str(limit)
-        )
-        dset = self.getDatasetObjByUuid(obj_uuid)
-        if dset is None:
-            msg = "Dataset: " + obj_uuid + " not found"
-            self.log.info(msg)
-            raise IOError(errno.ENXIO, msg)
-
-        values = []
-        dt = dset.dtype
-        typeItem = getTypeItem(dt)
-        # itemSize = getItemSize(typeItem)
-        if typeItem["class"] != "H5T_COMPOUND":
-            msg = "Only compound type datasets can be used as query target"
-            self.log.info(msg)
-            raise IOError(errno.EINVAL, msg)
-
-        if dset.shape is None:
-            # null space dataset (with h5py 2.6.0)
+        if shape_json["class"] == "H5S_NULL":
             return None
 
-        rank = len(dset.shape)
-        if rank != 1:
-            msg = "One one-dimensional datasets can be used as query target"
-            self.log.info(msg)
-            raise IOError(errno.EINVAL, msg)
-
-        values = []
-        indexes = []
-        count = 0
-
-        num_elements = dset.shape[0]
-        if stop == -1:
-            stop = num_elements
-        elif stop > num_elements:
-            stop = num_elements
-        block_size = self._getBlockSize(dset)
-        self.log.info("block_size: " + str(block_size))
-
-        field_names = list(dset.dtype.fields.keys())
-        eval_str = self._getEvalStr(query, field_names)
-
-        while start < stop:
-            if limit and (count == limit):
-                break  # no more rows for this batch
-            end = start + block_size
-            if end > stop:
-                end = stop
-            rows = dset[start:end]  # read from dataset
-            where_result = np.where(eval(eval_str))
-            index = where_result[0].tolist()
-            if len(index) > 0:
-                for i in index:
-                    row = rows[i]
-                    item = self.bytesArrayToList(row)
-                    values.append(item)
-                    indexes.append(start + i)
-                    count += 1
-                    if limit and (count == limit):
-                        break  # no more rows for this batch
-
-            start = end  # go to next block
-
-        # values = self.getDataValue(item_type, values, dimension=1, dims=(len(values),))
-
-        self.log.info("got " + str(count) + " query matches")
-        return (indexes, values)
-
-    """
-     _getBlockSize: Get number of rows to read from disk
-
-        heurestic to get reasonable sized chunk of data to fetch.
-        make multiple of chunk_size if possible
-    """
-
-    def _getBlockSize(self, dset):
-        target_block_size = 256 * 1000
-        if dset.chunks:
-            chunk_size = dset.chunks[0]
-            if chunk_size < target_block_size:
-                block_size = (target_block_size // chunk_size) * chunk_size
-            else:
-                block_size = target_block_size
-        else:
-            block_size = target_block_size
-        return block_size
-
-    """
-     _getEvalStr: Get eval string for given query
-
-        Gets Eval string to use with numpy where method.
-    """
-
-    def _getEvalStr(self, query, field_names):
-        i = 0
-        eval_str = ""
-        var_name = None
-        end_quote_char = None
-        var_count = 0
-        paren_count = 0
-        black_list = ("import",)  # field names that are not allowed
-        self.log.info("getEvalStr(" + query + ")")
-        for item in black_list:
-            if item in field_names:
-                msg = "invalid field name"
-                self.log.info("EINVAL: " + msg)
-                raise IOError(errno.EINVAL, msg)
-        while i < len(query):
-            ch = query[i]
-            if (i + 1) < len(query):
-                ch_next = query[i + 1]
-            else:
-                ch_next = None
-            if var_name and not ch.isalnum():
-                # end of variable
-                if var_name not in field_names:
-                    # invalid
-                    msg = "unknown field name"
-                    self.log.info("EINVAL: " + msg)
-                    raise IOError(errno.EINVAL, msg)
-                eval_str += "rows['" + var_name + "']"
-                var_name = None
-                var_count += 1
-
-            if end_quote_char:
-                if ch == end_quote_char:
-                    # end of literal
-                    end_quote_char = None
-                eval_str += ch
-            elif ch in ("'", '"'):
-                end_quote_char = ch
-                eval_str += ch
-            elif ch.isalpha():
-                if ch == "b" and ch_next in ("'", '"'):
-                    eval_str += "b"  # start of a byte string literal
-                elif var_name is None:
-                    var_name = ch  # start of a variable
-                else:
-                    var_name += ch
-            elif ch == "(" and end_quote_char is None:
-                paren_count += 1
-                eval_str += ch
-            elif ch == ")" and end_quote_char is None:
-                paren_count -= 1
-                if paren_count < 0:
-                    msg = "Mismatched paren"
-                    self.log.info("EINVAL: " + msg)
-                    raise IOError(errno.EINVAL, msg)
-                eval_str += ch
-            else:
-                # just add to eval_str
-                eval_str += ch
-            i = i + 1
-        if end_quote_char:
-            msg = "no matching quote character"
-            self.log.info("EINVAL: " + msg)
-            raise IOError(errno.EINVAL, msg)
-        if var_count == 0:
-            msg = "No field value"
-            self.log.info("EINVAL: " + msg)
-            raise IOError(errno.EINVAL, msg)
-        if paren_count != 0:
-            msg = "Mismatched paren"
-            self.log.info("EINVAL: " + msg)
-            raise IOError(errno.EINVAL, msg)
-
-        return eval_str
-
-    """
-    Get values from dataset identified by obj_uuid using the given
-    point selection.
-    """
-
-    def getDatasetPointSelectionByUuid(self, obj_uuid, points):
-        dset = self.getDatasetObjByUuid(obj_uuid)
-        if dset is None:
-            msg = "Dataset: " + obj_uuid + " not found"
-            self.log.info(msg)
-            raise IOError(errno.ENXIO, msg)
-
-        rank = len(dset.shape)
-        values = np.zeros(len(points), dtype=dset.dtype)
-        try:
-            i = 0
-            for point in points:
-                if rank == 1:
-                    values[i] = dset[[point]]
-                else:
-                    values[i] = dset[tuple(point)]
-                i += 1
-        except ValueError:
-            # out of range error
-            msg = "getDatasetPointSelection, out of range error"
-            self.log.info(msg)
-            raise IOError(errno.EINVAL, msg)
-        return values.tolist()
-
-    """
-    setDatasetValuesByUuid - update the given dataset values with supplied data
-      and optionally a hyperslab selection (slices)
-    """
-
-    def setDatasetValuesByUuid(self, obj_uuid, data, slices=None, format="json"):
-        dset = self.getDatasetObjByUuid(obj_uuid)
-
-        if format not in ("json", "binary"):
-            msg = "only json and binary formats are supported"
-            self.log.info(msg)
-            raise IOError(errno.EINVAL, msg)
-
-        if format == "binary" and type(data) is not bytes:
-            msg = "data must be of type bytes for binary writing"
-            self.log.info(msg)
-            raise IOError(errno.EINVAL, msg)
+        if shape_json["class"] == "H5S_SCALAR":
+            if sel.select_type != selections.H5S_SELECT_ALL:
+                # TBD: support other selection types
+                raise ValueError("Only SELECT_ALL selections are supported for scalar datasets")
+            if sel.shape != ():
+                raise ValueError("Selection shape does not match dataset shape")
+            rank = 0
+        else:
+            dims = tuple(shape_json["dims"])
+            if sel.shape != dims:
+                raise ValueError("Selection shape does not match dataset shape")
+            rank = len(dims)
 
-        if dset is None:
-            msg = "Dataset: " + obj_uuid + " not found"
-            self.log.info(msg)
-            raise IOError(errno.ENXIO, msg)
-
-        dt = dset.dtype
-        typeItem = getTypeItem(dt)
-        itemSize = getItemSize(typeItem)
-        rank = len(dset.shape)
-        arraySize = 1
-        for extent in dset.shape:
-            arraySize *= arraySize
-
-        if itemSize == "H5T_VARIABLE" and format == "binary":
-            msg = "Only JSON is supported for for this data type"
-            self.log.info(msg)
-            raise IOError(errno.EINVAL, msg)
-
-        if slices is None:
-            slices = []
-            # create selection that covers entire dataset
-            for dim in range(rank):
-                s = slice(0, dset.shape[dim], 1)
-                slices.append(s)
-            slices = tuple(slices)
-
-        if not isinstance(slices, tuple):
-            msg = "setDatasetValuesByUuid: bad type for dim parameter"
-            self.log.error(msg)
-            raise IOError(errno.EIO, msg)
-
-        if len(slices) != rank:
-            msg = "number of dims in selection not same as rank"
-            self.log.info(msg)
-            raise IOError(errno.EINVAL, msg)
-
-        npoints = 1
-        np_shape = []
-        for i in range(rank):
-            s = slices[i]
-
-            if s.start < 0 or s.step <= 0 or s.stop < s.start:
-                msg = "invalid slice specification"
-                self.log.info(msg)
-                raise IOError(errno.EINVAL, msg)
-            if s.stop > dset.shape[i]:
-                msg = "invalid slice specification"
-                self.log.info(msg)
-                raise IOError(errno.EINVAL, msg)
-            np_shape.append(s.stop - s.start)
-
-            count = (s.stop - s.start) // s.step
-            if count <= 0:
-                msg = "invalid slice specification"
-                self.log.info(msg)
-                raise IOError(errno.EINVAL, msg)
-
-            npoints *= count
-
-        np_shape = tuple(np_shape)  # for comparison with ndarray shape
-
-        self.log.info("selection shape:" + str(np_shape))
-
-        # need some special conversion for compound types --
-        # each element must be a tuple, but the JSON decoder
-        # gives us a list instead.
-        if format != "binary" and dset.dtype.names and isinstance(data, (list, tuple)):
-            data = self.toTuple(rank, data)
-            # for i in range(len(data)):
-            #    converted_data.append(self.toTuple(data[i]))
-            # data = converted_data
-        else:
-            h5t_check = h5py.check_dtype(ref=dset.dtype)
-            if h5t_check in (h5py.Reference, h5py.RegionReference):
-                # convert data to data refs
-                if format == "binary":
-                    msg = "Only JSON is supported for for this data type"
-                    self.log.info(msg)
-                    raise IOError(errno.EINVAL, msg)
-                data = self.listToRef(data)
-
-        if format == "binary":
-            if npoints * itemSize != len(data):
-                msg = (
-                    "Expected: "
-                    + str(npoints * itemSize)
-                    + " bytes, but got: "
-                    + str(len(data))
-                )
-                self.log.info(msg)
-                raise IOError(errno.EINVAL, msg)
-            if dset.dtype.shape == ():
-                arr = np.fromstring(data, dtype=dset.dtype)
-                arr = arr.reshape(np_shape)  # conform to selection shape
-            else:
-                # tricy array type!
-                arr = np.empty(np_shape, dtype=dset.dtype)
-                base_arr = np.fromstring(data, dtype=dset.dtype.base)
-                base_shape = list(np_shape)
-                base_shape.extend(dset.dtype.shape)  # add on the type dimensions
-                base_arr = base_arr.reshape(base_shape)
-                arr[...] = base_arr
-        else:
-            # data is json
-            if npoints == 1 and len(dset.dtype) > 1:
-                # convert to tuple for compound singleton writes
-                data = [
-                    tuple(data),
-                ]
-
-            arr = np.array(data, dtype=dset.dtype)
-            # raise an exception of the array shape doesn't match the selection shape
-            # allow if the array is a scalar and the selection shape is one element,
-            # numpy is ok with this
-            np_index = 0
-            for dim in range(len(arr.shape)):
-                data_extent = arr.shape[dim]
-                selection_extent = 1
-                if np_index < len(np_shape):
-                    selection_extent = np_shape[np_index]
-                if selection_extent == data_extent:
-                    np_index += 1
-                    continue  # good
-                if data_extent == 1:
-                    continue  # skip singleton selection
-                if selection_extent == 1:
-                    np_index += 1
-                    continue  # skip singleton selection
-
-                # selection/data mismatch!
-                msg = "data shape doesn't match selection shape"
-                msg += "--data shape: " + str(arr.shape)
-                msg += "--selection shape: " + str(np_shape)
-
-                self.log.info(msg)
-                raise IOError(errno.EINVAL, msg)
-
-        # write temp numpy array to dataset
-        if rank == 1:
-            s = slices[0]
-            try:
-                dset[s] = arr
-            except TypeError as te:
-                self.log.info("h5py setitem exception: " + str(te))
-                raise IOError(errno.EINVAL, str(te))
+        dtype = self.getDtype(dset_json)
+        if self.reader:
+            arr = self.reader.getDatasetValues(dset_id, sel, dtype=dtype)
         else:
-            try:
-                dset[slices] = arr
-            except TypeError as te:
-                self.log.info("h5py setitem exception: " + str(te))
-                raise IOError(errno.EINVAL, str(te))
+            # TBD: Initialize with fill value if non-zero
+            arr = np.zeros(sel.shape, dtype=dtype)
 
-        # update modified time
-        self.setModifiedTime(obj_uuid)
-        return True
-
-    """
-    setDatasetValuesByPointSelection - Update the dataset values using the given
-      data and point selection
-    """
-
-    def setDatasetValuesByPointSelection(self, obj_uuid, data, points, format="json"):
-        dset = self.getDatasetObjByUuid(obj_uuid)
-
-        if format not in ("json", "binary"):
-            msg = "only json and binary formats are supported"
-            self.log.info(msg)
-            raise IOError(errno.EINVAL, msg)
-
-        if format == "binary" and type(data) is not bytes:
-            msg = "data must be of type bytes for binary writing"
-            self.log.info(msg)
-            raise IOError(errno.EINVAL, msg)
-
-        if dset is None:
-            msg = "Dataset: " + obj_uuid + " not found"
-            self.log.info(msg)
-            raise IOError(errno.ENXIO, msg)
+        if "updates" in dset_json:
+            # apply any non-flushed changes that intersect the current selection
+            updates = dset_json["updates"]
+            for (update_sel, update_val) in updates:
+                sel_inter = selections.intersect(sel, update_sel)
+                if sel_inter.nselect == 0:
+                    continue
+                # update portion of arr, that intersects update_val
+                slices = []
+                for dim in range(rank):
+                    start = sel_inter.start[dim] - sel.start[dim]
+                    stop = start + sel_inter.count[dim]
+                    slices.append(slice(start, stop, 1))
+                slices = tuple(slices)
+                arr[slices] = update_val
+
+        return arr
+
+    def setDatasetValues(self, dset_id, sel, arr):
+        """
+        Write the given ndarray to the dataset using the selection
+        """
+        dset_json = self.getObjectById(dset_id)
+        shape_json = dset_json["shape"]
+        if not isinstance(sel, selections.Selection):
+            raise TypeError("Expected Selection class")
+        if sel.select_type not in (selections.H5S_SELECT_HYPERSLABS, selections.H5S_SELECT_ALL):
+            # TBD: support other selection types
+            raise ValueError("Only hyperslab selections are currently supported")
+        if not isinstance(arr, np.ndarray):
+            raise TypeError("Expected ndarray for data value")
+        if shape_json["class"] == "H5S_NULL":
+            raise ValueError("writing to null space dataset not supported")
+        if shape_json["class"] == "H5S_SCALAR":
+            if sel.shape != ():
+                raise ValueError("Selection shape does not match dataset shape")
+            if len(arr.shape) > 0:
+                raise TypeError("Expected scalar ndarray for scalar dataset")
+        else:
+            dims = tuple(shape_json["dims"])
+            if sel.shape != dims:
+                raise ValueError("Selection shape does not match dataset shape")
+        if "updates" not in dset_json or sel.select_type == selections.H5S_SELECT_ALL:
+            # for select all, throw out any existing updates since this will overwrite them
+            dset_json["updates"] = []
+        updates = dset_json["updates"]
+        updates.append((sel, arr.copy()))
+        self.make_dirty(dset_id)
+
+    def resizeDataset(self, dset_id, shape):
+        """
+        Resize existing Dataset
+        """
+        self.log.info(f"resizeDataset {dset_id}, {shape}")
+
+        dset_json = self.getObjectById(dset_id)  # will throw exception if not found
+        if resize_dataset(dset_json, shape):
+            self._dirty_objects.add(dset_id)
+
+    def deleteObject(self, obj_id):
+        """ Delete the given object """
+        self.log.info(f"deleteObject: {obj_id}")
+        if obj_id not in self.db:
+            raise KeyError(f"Object {obj_id} not found for deletion")
+        if obj_id == self.root_id:
+            raise KeyError("Root group cannot be deleted")
+        self.db[obj_id] = None
+
+        if obj_id in self._new_objects:
+            self._new_objects.remove(obj_id)
+
+        if obj_id in self._dirty_objects:
+            self._dirty_objects.remove(obj_id)
+
+        self._deleted_objects.add(obj_id)
+
+    def getLinks(self, grp_id):
+        """ Get the links for the given group """
+        grp_json = self.getObjectById(grp_id)
+        if "links" not in grp_json:
+            raise KeyError(f"No links - {grp_id} not a group?")
+        links = grp_json["links"]
+        names = []
+        for name in links:
+            if links[name] is not None:
+                names.append(name)
+        return names
+
+    def getLink(self, grp_id, name):
+        """ Get the given link """
+
+        obj_json = self.getObjectById(grp_id)
+        links = obj_json["links"]
+        if name not in links:
+            self.log.info(f"Link [{name}] not found in {grp_id}")
+            return None
+        if links[name] is None:
+            self.log.info(f"Link {name} in {grp_id} has been deleted")
+            return None
 
-        dt = dset.dtype
-        typeItem = getTypeItem(dt)
-        itemSize = getItemSize(typeItem)
-        if itemSize == "H5T_VARIABLE" and format == "binary":
-            msg = "Only JSON is supported for for this data type"
-            self.log.info(msg)
-            raise IOError(errno.EINVAL, msg)
-
-        rank = len(dset.shape)
-
-        # need some special conversion for compound types --
-        # each element must be a tuple, but the JSON decoder
-        # gives us a list instead.
-        if format == "json" and len(dset.dtype) > 1 and type(data) in (list, tuple):
-            raise NotImplementedError("need some special conversion for compound types")
-            # converted_data = self.toTuple(rank, data)
-            # for i in range(len(data)):
-            #    converted_data.append(self.toTuple(data[i]))
-            # data = converted_data
-
-        if format == "json":
-            try:
-                i = 0
-                for point in points:
-                    if rank == 1:
-                        dset[[point]] = data[i]
-                    else:
-                        dset[tuple(point)] = data[i]
-                    i += 1
-            except ValueError:
-                # out of range error
-                msg = "setDatasetValuesByPointSelection, out of range error"
-                self.log.info(msg)
-                raise IOError(errno.EINVAL, msg)
+        return links[name]
+
+    def _addLink(self, grp_id, name, link_json):
+        obj_json = self.getObjectById(grp_id)
+        links = obj_json["links"]
+        links[name] = link_json
+        self.make_dirty(grp_id)
+
+    def createHardLink(self, grp_id, name, tgt_id):
+        """ Create a new hardlink """
+        link_json = {"class": "H5L_TYPE_HARD", "id": tgt_id}
+        link_json["created"] = time.time()
+        self._addLink(grp_id, name, link_json)
+
+    def createSoftLink(self, grp_id, name, h5path):
+        """ Create a soft link """
+        link_json = {"class": "H5L_TYPE_SOFT", "h5path": h5path}
+        link_json["created"] = time.time()
+        self._addLink(grp_id, name, link_json)
+
+    def createCustomLink(self, grp_id, name, link_json):
+        """ create a custom link """
+        if link_json.get("class") != "H5L_TYPE_USER_DEFINED":
+            link_json["class"] = "H5L_TYPE_USER_DEFINED"
+        link_json["created"] = time.time()
+        self._addLink(grp_id, name, link_json)
+
+    def createExternalLink(self, grp_id, name, h5path, filepath):
+        """ Create a external link link """
+        link_json = {"class": "H5L_TYPE_EXTERNAL", "h5path": h5path, "file": filepath}
+        link_json["created"] = time.time()
+        self._addLink(grp_id, name, link_json)
+
+    def deleteLink(self, grp_id, name):
+        """ Delete the given link """
+        grp_json = self.getObjectById(grp_id)
+        if "links" not in grp_json:
+            raise KeyError(f"No links - {grp_id} not a group?")
+        links = grp_json["links"]
+        if name not in links:
+            raise KeyError(f"Link [{name}] not found in {grp_id}")
+        links[name] = None  # mark for deletion
+        self.make_dirty(grp_id)
+
+    def createGroup(self, cpl=None):
+        """ Create a new group """
+        if self.closed:
+            raise ValueError("db is closed")
+        grp_id = createObjId("groups", root_id=self.root_id)
+        group_json = {"attributes": {}, "links": {}}
+        if cpl:
+            group_json["cpl"] = cpl
+        else:
+            group_json["cpl"] = {}
+        group_json["created"] = time.time()
+        self.db[grp_id] = group_json
+        self._new_objects.add(grp_id)
+        return grp_id
+
+    def createCommittedType(self, datatype, cpl=None):
+        """
+        createCommittedType - creates new named datatype
+        Returns item
+        """
+        if self.closed:
+            raise ValueError("db is closed")
+        self.log.info("createCommittedType")
+        if cpl is None:
+            cpl = {}
 
+        ctype_id = createObjId(obj_type="datatypes", root_id=self.root_id)
+        if isinstance(datatype, np.dtype):
+            dt = datatype
         else:
-            # binary
-            arr = np.fromstring(data, dtype=dset.dtype)
-            dset[points] = arr  # coordinate write
+            dt = createDataType(datatype)
 
-        # update modified time
-        self.setModifiedTime(obj_uuid)
-        return True
+        type_json = getTypeItem(dt)  # get canonical json description of datatype
 
-    """
-    createDataset - creates new dataset given shape and datatype
-    Returns item
-    """
+        ctype_json = {"type": type_json, "attributes": {}, "cpl": cpl}
+        ctype_json["created"] = time.time()
+        self.db[ctype_id] = ctype_json
+        self._new_objects.add(ctype_id)
+        return ctype_id
 
     def createDataset(
-        self, datatype, datashape, max_shape=None, creation_props=None, obj_uuid=None
+        self,
+        shape=None,
+        maxdims=None,
+        dtype=None,
+        cpl=None,
     ):
-        self.initFile()
-        if self.readonly:
-            msg = "Unable to create dataset (Updates are not allowed)"
-            self.log.info(msg)
-            raise IOError(errno.EPERM, msg)
-        datasets = self.dbGrp["{datasets}"]
-        if not obj_uuid:
-            obj_uuid = str(uuid.uuid1())
-        dt = None
-        item = {}
-        fillvalue = None
-
-        # h5py.createdataset fields
-        kwargs = {}  # key word arguments for h5py dataset creation
-
-        if creation_props is None:
-            creation_props = {}  # create empty list for convience
-
-        if creation_props:
-            if "fillValue" in creation_props:
-                fillvalue = creation_props["fillValue"]
-            if "trackTimes" in creation_props:
-                kwargs["track_times"] = creation_props["trackTimes"]
-            if "layout" in creation_props:
-                layout = creation_props["layout"]
-                if "dims" in layout:
-                    kwargs["chunks"] = tuple(layout["dims"])
-            if "filters" in creation_props:
-                filter_props = creation_props["filters"]
-                for filter_prop in filter_props:
-                    if "id" not in filter_prop:
-                        msg = "filter id not provided"
-                        self.log.info(msg)
-                        raise IOError(errno.EINVAL, msg)
-                    filter_id = filter_prop["id"]
-                    if filter_id not in _HDF_FILTERS:
-                        self.log.info(
-                            "unknown filter id: " + str(filter_id) + " ignoring"
-                        )
-                        continue
-
-                    hdf_filter = _HDF_FILTERS[filter_id]
-
-                    self.log.info("got filter: " + str(filter_id))
-                    if "alias" not in hdf_filter:
-                        self.log.info(
-                            "unsupported filter id: " + str(filter_id) + " ignoring"
-                        )
-                        continue
-
-                    filter_alias = hdf_filter["alias"]
-                    if not h5py.h5z.filter_avail(filter_id):
-                        self.log.info(
-                            "compression filter not available, filter: "
-                            + filter_alias
-                            + " will be ignored"
-                        )
-                        continue
-                    if filter_alias in _H5PY_COMPRESSION_FILTERS:
-                        if kwargs.get("compression"):
-                            self.log.info(
-                                "compression filter already set, filter: "
-                                + filter_alias
-                                + " will be ignored"
-                            )
-                            continue
-
-                        kwargs["compression"] = filter_alias
-                        self.log.info(
-                            "setting compression filter to: " + kwargs["compression"]
-                        )
-                        if filter_alias == "gzip":
-                            # check for an optional compression value
-                            if "level" in filter_prop:
-                                kwargs["compression_opts"] = filter_prop["level"]
-                        elif filter_alias == "szip":
-                            bitsPerPixel = None
-                            coding = "nn"
-
-                            if "bitsPerPixel" in filter_prop:
-                                bitsPerPixel = filter_prop["bitsPerPixel"]
-                            if "coding" in filter_prop:
-                                if filter_prop["coding"] == "H5_SZIP_EC_OPTION_MASK":
-                                    coding = "ec"
-                                elif filter_prop["coding"] == "H5_SZIP_NN_OPTION_MASK":
-                                    coding = "nn"
-                                else:
-                                    msg = "invalid szip option: 'coding'"
-                                    self.log.info(msg)
-                                    raise IOError(errno.EINVAL, msg)
-                            # note: pixelsPerBlock, and pixelsPerScanline not supported by h5py,
-                            # so these options will be ignored
-                            if "pixelsPerBlock" in filter_props:
-                                self.log.info("ignoring szip option: 'pixelsPerBlock'")
-                            if "pixelsPerScanline" in filter_props:
-                                self.log.info(
-                                    "ignoring szip option: 'pixelsPerScanline'"
-                                )
-                            if bitsPerPixel:
-                                kwargs["compression_opts"] = (coding, bitsPerPixel)
-                    else:
-                        if filter_alias == "shuffle":
-                            kwargs["shuffle"] = True
-                        elif filter_alias == "fletcher32":
-                            kwargs["fletcher32"] = True
-                        elif filter_alias == "scaleoffset":
-                            if "scaleOffset" not in filter_prop:
-                                msg = "No scale_offset provided for scale offset filter"
-                                self.log(msg)
-                                raise IOError(errno.EINVAL, msg)
-                            kwargs["scaleoffset"] = filter_prop["scaleOffset"]
-                        else:
-                            self.log.info(
-                                "Unexpected filter name: "
-                                + filter_alias
-                                + " , ignoring"
-                            )
-
-        dt_ref = self.createTypeFromItem(datatype)
-        if dt_ref is None:
-            msg = "Unexpected error, no type returned"
-            self.log.error(msg)
-            raise IOError(errno.EIO, msg)
-
-        dt = dt_ref
-        if hasattr(dt_ref, "dtype"):
-            # dt_ref is actualy a handle to a committed type
-            # get the dtype prop, but use dt_ref for the actual dataset creation
-            dt = dt_ref.dtype
-
-        if fillvalue and len(dt) > 1 and type(fillvalue) in (list, tuple):
-            # for compound types, need to convert from list to dataset compatible element
-
-            if len(dt) != len(fillvalue):
-                msg = "fillvalue has incorrect number of elements"
-                self.log.info(msg)
-                raise IOError(errno.EINVAL, msg)
-            ndscalar = np.zeros((), dtype=dt)
-            for i in range(len(fillvalue)):
-                field = dt.names[i]
-                ndscalar[field] = self.toTuple(0, fillvalue[i])
-            fillvalue = ndscalar
-
-        if fillvalue:
-            kwargs["fillvalue"] = fillvalue
-
-        dataset_id = None
-        if datashape is None:
-            # create null space dataset
-            # null space datasets not supported in h5py yet:
-            # See: https://github.com/h5py/h5py/issues/279
-            # work around this by using low-level interface.
-            # first create a temp scalar dataset so we can pull out the typeid
-            tmpGrp = None
-            if "{tmp}" not in self.dbGrp:
-                tmpGrp = self.dbGrp.create_group("{tmp}")
-            else:
-                tmpGrp = self.dbGrp["{tmp}"]
-            tmpDataset = tmpGrp.create_dataset(obj_uuid, shape=(1,), dtype=dt_ref)
-            tid = tmpDataset.id.get_type()
-            sid = sid = h5py.h5s.create(h5py.h5s.NULL)
-            # now create the permanent dataset
-            gid = datasets.id
-            b_obj_uuid = obj_uuid.encode("utf-8")
-            dataset_id = h5py.h5d.create(gid, b_obj_uuid, tid, sid)
-            # delete the temp dataset
-            del tmpGrp[obj_uuid]
-        else:
-            # create the dataset
-            try:
-                newDataset = datasets.create_dataset(
-                    obj_uuid,
-                    shape=datashape,
-                    maxshape=max_shape,
-                    dtype=dt_ref,
-                    **kwargs,
-                )
-            except ValueError as ve:
-                msg = "Unable to create dataset"
-                try:
-                    msg += ": " + ve.message
-                except AttributeError:
-                    pass  # no message
-                self.log.info(msg)
-                raise IOError(errno.EINVAL, msg)  # assume this is due to invalid params
-
-            if newDataset:
-                dataset_id = newDataset.id
-
-        if dataset_id is None:
-            msg = "Unexpected failure to create dataset"
-            self.log.error(msg)
-            raise IOError(errno.EIO, msg)
-        # store reverse map as an attribute
-        addr = h5py.h5o.get_info(dataset_id).addr
-        addrGrp = self.dbGrp["{addr}"]
-        addrGrp.attrs[str(addr)] = obj_uuid
-
-        # save creation props if any
-        if creation_props:
-            self.setDatasetCreationProps(obj_uuid, creation_props)
-
-        # set timestamp
-        now = time.time()
-        self.setCreateTime(obj_uuid, timestamp=now)
-        self.setModifiedTime(obj_uuid, timestamp=now)
-
-        item["id"] = obj_uuid
-        if self.update_timestamps:
-            item["ctime"] = self.getCreateTime(obj_uuid)
-            item["mtime"] = self.getModifiedTime(obj_uuid)
-        item["attributeCount"] = 0
-        return item
-
-    """
-    Resize existing Dataset
-    """
-
-    def resizeDataset(self, obj_uuid, shape):
-        self.log.info("resizeDataset(")  # + obj_uuid + "): ") # + str(shape))
-        self.initFile()
-        if self.readonly:
-            msg = "Unable to resize dataset (Updates are not allowed)"
-            self.log.info(msg)
-            raise IOError(errno.EACESS, msg)
-        dset = self.getDatasetObjByUuid(obj_uuid)  # will throw exception if not found
-        if len(shape) != len(dset.shape):
-            msg = "Unable to resize dataset, shape has wrong number of dimensions"
-            self.log.info(msg)
-            raise IOError(errno.EINVAL, msg)
-        for i in range(len(shape)):
-            if shape[i] < dset.shape[i]:
-                msg = "Unable to resize dataset, cannot make extent smaller"
-                self.log.info(msg)
-                raise IOError(errno.EINVAL, msg)
-            if dset.maxshape[i] is not None and shape[i] > dset.maxshape[i]:
-                msg = "Unable to resize dataset, max extent exceeded"
-                self.log.info(msg)
-                raise IOError(errno.EINVAL, msg)
-
-        dset.resize(shape)  # resize
-
-        # update modified time
-        self.setModifiedTime(obj_uuid)
-
-    """
-    Check if link points to given target (as a HardLink)
-    """
-
-    def isObjectHardLinked(self, parentGroup, targetGroup, linkName):
-        try:
-            linkObj = parentGroup.get(linkName, None, False, True)
-            linkClass = linkObj.__class__.__name__
-        except TypeError:
-            # UDLink? Ignore for now
-            return False
-        if linkClass == "SoftLink":
-            return False
-        elif linkClass == "ExternalLink":
-            return False
-        elif linkClass == "HardLink":
-            if parentGroup[linkName] == targetGroup:
-                return True
-        else:
-            self.log.warning("unexpected linkclass: " + linkClass)
-            return False
-
-    """
-    Delete Dataset, Group or Datatype by UUID
-    """
-
-    def deleteObjectByUuid(self, objtype, obj_uuid):
-        if objtype not in ("group", "dataset", "datatype"):
-            msg = "unexpected objtype: " + objtype
-            self.log.error(msg)
-            raise IOError(errno.EIO, msg)
-        self.initFile()
-        self.log.info("delete uuid: " + obj_uuid)
-        if self.readonly:
-            msg = "Unable to delete object (Updates are not allowed)"
-            self.log.info(msg)
-            raise IOError(errno.EPERM, msg)
-
-        if obj_uuid == self.dbGrp.attrs["rootUUID"] and objtype == "group":
-            # can't delete root group
-            msg = "Unable to delete group (root group may not be deleted)"
-            self.log.info(msg)
-            raise IOError(errno.EPERM, msg)
-
-        dbCol = None
-        tgt = None
-        if objtype == "dataset":
-            tgt = self.getDatasetObjByUuid(obj_uuid)
-            dbCol = self.dbGrp["{datasets}"]
-        elif objtype == "group":
-            tgt = self.getGroupObjByUuid(obj_uuid)
-            dbCol = self.dbGrp["{groups}"]
-        else:  # datatype
-            tgt = self.getCommittedTypeObjByUuid(obj_uuid)
-            dbCol = self.dbGrp["{datatypes}"]
-
-        if tgt is None:
-            msg = "Unable to delete " + objtype + ", uuid: " + obj_uuid + " not found"
-            self.log.info(msg)
-            raise IOError(errno.ENXIO, msg)
-
-        # unlink from root (if present)
-        self.unlinkObject(self.f["/"], tgt)
-
-        groups = self.dbGrp["{groups}"]
-        # iterate through each group in the file and unlink tgt if it is linked
-        # by the group.
-        # We'll store a list of links to be removed as we go, and then actually
-        # remove the links after the iteration is done (otherwise we can run into issues
-        # where the key has become invalid)
-        linkList = []  # this is our list
-        for uuidName in groups.attrs:
-            grpRef = groups.attrs[uuidName]
-            # de-reference handle
-            grp = self.f[grpRef]
-            for linkName in grp:
-                if self.isObjectHardLinked(grp, tgt, linkName):
-                    linkList.append({"group": grp, "link": linkName})
-        for item in linkList:
-            self.unlinkObjectItem(item["group"], tgt, item["link"])
-
-        addr = h5py.h5o.get_info(tgt.id).addr
-        addrGrp = self.dbGrp["{addr}"]
-        del addrGrp.attrs[str(addr)]  # remove reverse map
-        dbRemoved = False
-
-        # finally, remove the dataset from db
-        if obj_uuid in dbCol:
-            # should be here (now it is anonymous)
-            del dbCol[obj_uuid]
-            dbRemoved = True
-
-        if not dbRemoved:
-            self.log.warning("did not find: " + obj_uuid + " in anonymous collection")
-
-            if obj_uuid in dbCol.attrs:
-                self.log.info(
-                    "removing: " + obj_uuid + " from non-anonymous collection"
-                )
-                del dbCol.attrs[obj_uuid]
-                dbRemoved = True
-
-        if not dbRemoved:
-            msg = "Unexpected Error, did not find reference to: " + obj_uuid
-            self.log.error(msg)
-            raise IOError(errno.EIO, msg)
-
-        # note when the object was deleted
-        self.setModifiedTime(obj_uuid)
-
-        return True
-
-    def getGroupItemByUuid(self, obj_uuid):
-        self.initFile()
-        grp = self.getGroupObjByUuid(obj_uuid)
-        if grp is None:
-            if self.getModifiedTime(obj_uuid, useRoot=False):
-                msg = "Group with uuid: " + obj_uuid + " has been previously deleted"
-                self.log.info(msg)
-                raise IOError(errno.ENOENT, msg)
-            else:
-                msg = "Group with uuid: " + obj_uuid + " was not found"
-                self.log.info(msg)
-                raise IOError(errno.ENXIO, msg)
-
-        linkCount = len(grp)
-        if "__db__" in grp:
-            linkCount -= 1  # don't include the db group
-
-        item = {"id": obj_uuid}
-        alias = []
-        if grp.name and not grp.name.startswith("/__db__"):
-            alias.append(grp.name)  # just use the default h5py path for now
-        item["alias"] = alias
-        item["attributeCount"] = len(grp.attrs)
-        item["linkCount"] = linkCount
-        if self.update_timestamps:
-            item["ctime"] = self.getCreateTime(obj_uuid)
-            item["mtime"] = self.getModifiedTime(obj_uuid)
-
-        return item
-
-    """
-    getLinkItemByObj - return info about a link
-        parent: reference to group
-        linkName: name of link
-        return: item dictionary with link attributes, or None if not found
-    """
-
-    def getLinkItemByObj(self, parent, link_name):
-        if link_name not in parent:
-            return None
-
-        if link_name == "__db__":
-            return None  # don't provide link to db group
-        #  "http://somefile/#h5path(somepath)")
-        item = {"title": link_name}
-        # get the link object, one of HardLink, SoftLink, or ExternalLink
-        try:
-            linkObj = parent.get(link_name, None, False, True)
-            linkClass = linkObj.__class__.__name__
-        except TypeError:
-            # UDLink? set class as 'user'
-            linkClass = "UDLink"  # user defined links
-            item["class"] = "H5L_TYPE_USER_DEFINED"
-        if linkClass == "SoftLink":
-            item["class"] = "H5L_TYPE_SOFT"
-            item["h5path"] = linkObj.path
-            item["href"] = "#h5path(" + linkObj.path + ")"
-        elif linkClass == "ExternalLink":
-            item["class"] = "H5L_TYPE_EXTERNAL"
-            item["h5path"] = linkObj.path
-            item["file"] = linkObj.filename
-            item["href"] = "#h5path(" + linkObj.path + ")"
-        elif linkClass == "HardLink":
-            # Hardlink doesn't have any properties itself, just get the linked
-            # object
-            obj = parent[link_name]
-            addr = h5py.h5o.get_info(obj.id).addr
-            item["class"] = "H5L_TYPE_HARD"
-            item["id"] = self.getUUIDByAddress(addr)
-            class_name = obj.__class__.__name__
-            if class_name == "Dataset":
-                item["href"] = "datasets/" + item["id"]
-                item["collection"] = "datasets"
-            elif class_name == "Group":
-                item["href"] = "groups/" + item["id"]
-                item["collection"] = "groups"
-            elif class_name == "Datatype":
-                item["href"] = "datatypes/" + item["id"]
-                item["collection"] = "datatypes"
-            else:
-                self.log.warning("unexpected object type: " + item["type"])
-
-        return item
-
-    def getLinkItemByUuid(self, grpUuid, link_name):
-        self.log.info("db.getLinkItemByUuid(" + grpUuid + ", [" + link_name + "])")
-        if not link_name:
-            msg = "link_name not specified"
-            self.log.info(msg)
-            raise IOError(errno.EINVAL, msg)
-
-        self.initFile()
-        parent = self.getGroupObjByUuid(grpUuid)
-        if parent is None:
-            msg = "Parent group: " + grpUuid + " of link not found"
-            self.log.info(msg)
-            raise IOError(errno.ENXIO, msg)
-
-        item = self.getLinkItemByObj(parent, link_name)
-        # add timestamps
-        if item:
-            if self.update_timestamps:
-                item["ctime"] = self.getCreateTime(
-                    grpUuid, objType="link", name=link_name
-                )
-                item["mtime"] = self.getModifiedTime(
-                    grpUuid, objType="link", name=link_name
-                )
-        else:
-            self.log.info("link not found")
-            mtime = self.getModifiedTime(
-                grpUuid, objType="link", name=link_name, useRoot=False
-            )
-            if mtime:
-                msg = (
-                    "Link ["
-                    + link_name
-                    + "] of: "
-                    + grpUuid
-                    + " has been previously deleted"
-                )
-                self.log.info(msg)
-                raise IOError(errno.ENOENT, msg)
-            else:
-                msg = "Link [" + link_name + "] of: " + grpUuid + " not found"
-                self.log.info(msg)
-                raise IOError(errno.ENXIO, msg)
-
-        return item
-
-    def getLinkItems(self, grpUuid, marker=None, limit=0):
-        self.log.info("db.getLinkItems(" + grpUuid + ")")
-        if marker:
-            self.log.info("...marker: " + marker)
-        if limit:
-            self.log.info("...limit: " + str(limit))
-
-        self.initFile()
-        parent = self.getGroupObjByUuid(grpUuid)
-        if parent is None:
-            msg = "Parent group: " + grpUuid + " not found, no links returned"
-            self.log.info(msg)
-            raise IOError(errno.ENXIO, msg)
-        items = []
-        gotMarker = True
-        if marker is not None:
-            gotMarker = False
-        count = 0
-        for link_name in parent:
-            if link_name == "__db__":
+        """
+        createDataset - creates new dataset given shape and datatype
+        Returns obj_id
+        """
+        if self.closed:
+            raise ValueError("db is closed")
+        type_json = getTypeItem(dtype)
+        if shape == "H5S_NULL":
+            shape_json = {"class": "H5S_NULL"}
+        elif shape == ():
+            shape_json = {"class": "H5S_SCALAR"}
+        else:
+            shape_json = {"class": "H5S_SIMPLE"}
+            shape_json["dims"] = list(shape)
+
+        if maxdims:
+            if shape_json["class"] != "H5S_SIMPLE":
+                raise ValueError("only simple shapes can be resizable")
+            if len(shape) != len(maxdims):
+                raise ValueError("maxdims length not equal to shape rank")
+            shape_json["maxdims"] = ["H5S_UNLIMITED" if x is None else x for x in maxdims]
+
+        dset_json = {"shape": shape_json, "type": type_json, "attributes": {}}
+        if cpl:
+            dset_json["cpl"] = cpl
+        else:
+            dset_json["cpl"] = {}
+
+        dset_id = createObjId("datasets", root_id=self.root_id)
+        self.db[dset_id] = dset_json
+        self._new_objects.add(dset_id)
+        return dset_id
+
+    def getCollection(self, col_type=None):
+        obj_ids = []
+        for obj_id in self.db:
+            if self.db[obj_id] is None:
+                # skip deleted objects
                 continue
-            if not gotMarker:
-                if link_name == marker:
-                    gotMarker = True
-                    continue  # start filling in result on next pass
-                else:
-                    continue  # keep going!
-            item = self.getLinkItemByObj(parent, link_name)
-            items.append(item)
-
-            count += 1
-            if limit > 0 and count == limit:
-                break  # return what we got
-        return items
-
-    def unlinkItem(self, grpUuid, link_name):
-        if self.readonly:
-            msg = "Unable to unlink item (Updates are not allowed)"
-            self.log.info(msg)
-            raise IOError(errno.EPERM, msg)
-        grp = self.getGroupObjByUuid(grpUuid)
-        if grp is None:
-            msg = "Parent group: " + grpUuid + " not found, cannot remove link"
-            self.log.info(msg)
-            raise IOError(errno.ENXIO, msg)
-
-        if link_name not in grp:
-            msg = (
-                "Link: ["
-                + link_name
-                + "] of group: "
-                + grpUuid
-                + " not found, cannot remove link"
-            )
-            self.log.info(msg)
-            raise IOError(errno.ENXIO, msg)
-
-        if link_name == "__db__":
-            # don't allow db group to be unlinked!
-            msg = "Unlinking of __db__ group not allowed"
-            raise IOError(errno.EPERM, msg)
-
-        obj = None
-        try:
-            linkObj = grp.get(link_name, None, False, True)
-            linkClass = linkObj.__class__.__name__
-            if linkClass == "HardLink":
-                # we can safely reference the object
-                obj = grp[link_name]
-        except TypeError:
-            # UDLink? Return false to indicate that we can not delete this
-            msg = "Unable to unlink user defined link"
-            self.log.info(msg)
-            raise IOError(errno.EPERM, msg)
+            if not col_type or getCollectionForId(obj_id) == col_type:
+                obj_ids.append(obj_id)
+        return obj_ids
 
-        linkDeleted = False
-        if obj is not None:
-            linkDeleted = self.unlinkObjectItem(grp, obj, link_name)
-        else:
-            # SoftLink or External Link - we can just remove the key
-            del grp[link_name]
-            linkDeleted = True
-
-        if linkDeleted:
-            # update timestamp
-            self.setModifiedTime(grpUuid, objType="link", name=link_name)
-
-        return linkDeleted
-
-    def getCollection(self, col_type, marker=None, limit=None):
-        self.log.info("db.getCollection(" + col_type + ")")
-        # col_type should be either "datasets", "groups", or "datatypes"
-        if col_type not in ("datasets", "groups", "datatypes"):
-            msg = "Unexpected col_type: [" + col_type + "]"
-            self.log.error(msg)
-            raise IOError(errno.EIO, msg)
-        self.initFile()
-        col = None  # Group, Dataset, or Datatype
-        if col_type == "datasets":
-            col = self.dbGrp["{datasets}"]
-        elif col_type == "groups":
-            col = self.dbGrp["{groups}"]
-        else:  # col_type == "datatypes"
-            col = self.dbGrp["{datatypes}"]
-
-        uuids = []
+    def __len__(self):
+        # return the number of objects
         count = 0
-        # gather the non-anonymous ids first
-        for obj_uuid in col.attrs:
-            if marker:
-                if obj_uuid == marker:
-                    marker = None  # clear and pick up next item
-                continue
-            uuids.append(obj_uuid)
-            count += 1
-            if limit is not None and limit > 0 and count == limit:
-                break
-
-        if limit == 0 or (limit is not None and count < limit):
-            # grab any anonymous obj ids next
-            for obj_uuid in col:
-                if marker:
-                    if obj_uuid == marker:
-                        marker = None  # clear and pick up next item
-                    continue
-                uuids.append(obj_uuid)
+        for obj_id in self.db:
+            # skip deleted objects
+            if self.db[obj_id] is not None:
                 count += 1
-                if limit is not None and limit > 0 and count == limit:
-                    break
-
-        return uuids
-
-    """
-      Get the DB Collection names
-    """
-
-    def getDBCollections(self):
-        return ("{groups}", "{datasets}", "{datatypes}")
-
-    """
-        Return the db collection the uuid belongs to
-    """
-
-    def getDBCollection(self, obj_uuid):
-        dbCollections = self.getDBCollections()
-        for dbCollectionName in dbCollections:
-            col = self.dbGrp[dbCollectionName]
-            if obj_uuid in col or obj_uuid in col.attrs:
-                return col
-        return None
-
-    def unlinkObjectItem(self, parentGrp, tgtObj, link_name):
-        if self.readonly:
-            msg = "Unexpected attempt to unlink object"
-            self.log.error(msg)
-            raise IOError(errno.EIO, msg)
-        if link_name not in parentGrp:
-            msg = "Unexpected: did not find link_name: [" + link_name + "]"
-            self.log.error(msg)
-            raise IOError(errno.EIO, msg)
-        try:
-            linkObj = parentGrp.get(link_name, None, False, True)
-        except TypeError:
-            # user defined link?
-            msg = "Unable to remove link (user-defined link?)"
-            self.log.error(msg)
-            raise IOError(errno.EIO, msg)
-        linkClass = linkObj.__class__.__name__
-        # only deal with HardLinks
-        linkDeleted = False
-        if linkClass == "HardLink":
-            obj = parentGrp[link_name]
-            if tgtObj is None or obj == tgtObj:
-                numlinks = self.getNumLinksToObject(obj)
-                if numlinks == 1:
-                    # last link to this object - convert to anonymous object by
-                    # creating link under {datasets} or {groups} or {datatypes}
-                    # also remove the attribute UUID key
-                    addr = h5py.h5o.get_info(obj.id).addr
-                    obj_uuid = self.getUUIDByAddress(addr)
-                    self.log.info("converting: " + obj_uuid + " to anonymous obj")
-                    dbCol = self.getDBCollection(obj_uuid)
-                    del dbCol.attrs[obj_uuid]  # remove the object ref
-                    dbCol[obj_uuid] = obj  # add a hardlink
-                self.log.info(
-                    "deleting link: [" + link_name + "] from: " + parentGrp.name
-                )
-                del parentGrp[link_name]
-                linkDeleted = True
-        else:
-            self.log.info("unlinkObjectItem: link is not a hardlink, ignoring")
-        return linkDeleted
-
-    def unlinkObject(self, parentGrp, tgtObj):
-        for name in parentGrp:
-            self.unlinkObjectItem(parentGrp, tgtObj, name)
-        return True
-
-    def linkObject(self, parentUUID, childUUID, link_name):
-        self.initFile()
-        if self.readonly:
-            msg = "Unable to create link (Updates are not allowed)"
-            self.log.info(msg)
-            raise IOError(errno.EPERM, msg)
-
-        parentObj = self.getGroupObjByUuid(parentUUID)
-        if parentObj is None:
-            msg = "Unable to create link, parent UUID: " + parentUUID + " not found"
-            self.log.info(msg)
-            raise IOError(errno.ENXIO, msg)
-
-        childObj = self.getDatasetObjByUuid(childUUID)
-        if childObj is None:
-            # maybe it's a group...
-            childObj = self.getGroupObjByUuid(childUUID)
-        if childObj is None:
-            # or maybe it's a committed datatype...
-            childObj = self.getCommittedTypeObjByUuid(childUUID)
-        if childObj is None:
-            msg = "Unable to link item, child UUID: " + childUUID + " not found"
-            self.log.info(msg)
-            raise IOError(errno.ENXIO, msg)
-        if link_name in parentObj:
-            # link already exists
-            self.log.info("linkname already exists, deleting")
-            self.unlinkObjectItem(parentObj, None, link_name)
-        parentObj[link_name] = childObj
-
-        # convert this from an anonymous object to ref if needed
-        dbCol = self.getDBCollection(childUUID)
-        if childUUID in dbCol:
-            # convert to a ref
-            del dbCol[childUUID]  # remove hardlink
-            dbCol.attrs[childUUID] = childObj.ref  # create a ref
-
-        # set link timestamps
-        now = time.time()
-        self.setCreateTime(parentUUID, objType="link", name=link_name, timestamp=now)
-        self.setModifiedTime(parentUUID, objType="link", name=link_name, timestamp=now)
-        return True
-
-    def createSoftLink(self, parentUUID, linkPath, link_name):
-        self.initFile()
-        if self.readonly:
-            msg = "Unable to create link (Updates are not allowed)"
-            self.log.info(msg)
-            raise IOError(errno.EPERM, msg)
-        parentObj = self.getGroupObjByUuid(parentUUID)
-        if parentObj is None:
-            msg = "Unable to create link, parent UUID: " + parentUUID + " not found"
-            self.log.info(msg)
-            raise IOError(errno.ENXIO, msg)
-        if link_name in parentObj:
-            # link already exists
-            self.log.info("linkname already exists, deleting")
-            del parentObj[link_name]  # delete old link
-        parentObj[link_name] = h5py.SoftLink(linkPath)
-
-        now = time.time()
-        self.setCreateTime(parentUUID, objType="link", name=link_name, timestamp=now)
-        self.setModifiedTime(parentUUID, objType="link", name=link_name, timestamp=now)
-
-        return True
-
-    def createExternalLink(self, parentUUID, extPath, linkPath, link_name):
-        self.initFile()
-        if self.readonly:
-            msg = "Unable to create link (Updates are not allowed)"
-            self.log.info(msg)
-            raise IOError(errno.EPERM, msg)
-        parentObj = self.getGroupObjByUuid(parentUUID)
-        if parentObj is None:
-            msg = "Unable to create link, parent UUID: " + parentUUID + " not found"
-            self.log.info(msg)
-            raise IOError(errno.ENXIO, msg)
-        if link_name in parentObj:
-            # link already exists
-            self.log.info("linkname already exists, deleting")
-            del parentObj[link_name]  # delete old link
-        parentObj[link_name] = h5py.ExternalLink(extPath, linkPath)
-
-        now = time.time()
-        self.setCreateTime(parentUUID, objType="link", name=link_name, timestamp=now)
-        self.setModifiedTime(parentUUID, objType="link", name=link_name, timestamp=now)
-
-        return True
-
-    def createGroup(self, obj_uuid=None):
-        self.initFile()
-        if self.readonly:
-            msg = "Unable to create group (Updates are not allowed)"
-            self.log.info(msg)
-            raise IOError(errno.EPERM, msg)
-        groups = self.dbGrp["{groups}"]
-        if not obj_uuid:
-            obj_uuid = str(uuid.uuid1())
-        newGroup = groups.create_group(obj_uuid)
-        # store reverse map as an attribute
-        addr = h5py.h5o.get_info(newGroup.id).addr
-        addrGrp = self.dbGrp["{addr}"]
-        addrGrp.attrs[str(addr)] = obj_uuid
-
-        # set timestamps
-        now = time.time()
-        self.setCreateTime(obj_uuid, timestamp=now)
-        self.setModifiedTime(obj_uuid, timestamp=now)
-
-        return obj_uuid
-
-    def getNumberOfGroups(self):
-        self.initFile()
-        count = 0
-        groups = self.dbGrp["{groups}"]
-        count += len(groups)  # anonymous groups
-        count += len(groups.attrs)  # linked groups
-        count += 1  # add of for root group
-
         return count
 
-    def getNumberOfDatasets(self):
-        self.initFile()
-        count = 0
-        datasets = self.dbGrp["{datasets}"]
-        count += len(datasets)  # anonymous datasets
-        count += len(datasets.attrs)  # linked datasets
-        return count
+    def __iter__(self):
+        """ Iterate over object ids """
 
-    def getNumberOfDatatypes(self):
-        self.initFile()
-        count = 0
-        datatypes = self.dbGrp["{datatypes}"]
-        count += len(datatypes)  # anonymous datatypes
-        count += len(datatypes.attrs)  # linked datatypes
-        return count
+        for obj_id in self.db:
+            if self.db[obj_id] is None:
+                # skip deleted objects
+                continue
+            yield obj_id
+
+    def __contains__(self, obj_id):
+        """ Test if a obj id  exists """
+        return obj_id in self.db and self.db[obj_id] is not None
diff --git a/src/h5json/hdf5dtype.py b/src/h5json/hdf5dtype.py
old mode 100755
new mode 100644
index 9f867f2..bbef116
--- a/src/h5json/hdf5dtype.py
+++ b/src/h5json/hdf5dtype.py
@@ -2,37 +2,257 @@
 # Copyright by The HDF Group.                                                #
 # All rights reserved.                                                       #
 #                                                                            #
-# This file is part of H5Serv (HDF5 REST Server) Service, Libraries and      #
-# Utilities.  The full HDF5 REST Server copyright notice, including          #
+# This file is part of HSDS (HDF5 Scalable Data Service), Libraries and      #
+# Utilities.  The full HSDS copyright notice, including                      #
 # terms governing use, modification, and redistribution, is contained in     #
 # the file COPYING, which can be found at the root of the source code        #
 # distribution tree.  If you do not have access to this file, you may        #
 # request a copy from help@hdfgroup.org.                                     #
 ##############################################################################
 
-"""
-This class is used to map between HDF5 type representations and numpy types
-
-"""
+import weakref
 import numpy as np
-from h5py.h5t import special_dtype
-from h5py.h5t import check_dtype
-from h5py.h5r import Reference
-from h5py.h5r import RegionReference
+
+
+numpy_integer_types = (np.int8, np.uint8, np.int16, np.int16, np.int32, np.uint32, np.int64, np.uint64)
+numpy_float_types = (np.float16, np.float32, np.float64)
+
+
+class Reference:
+    """
+    Represents an HDF5 object reference
+    """
+
+    @property
+    def id(self):
+        """Low-level identifier appropriate for this object"""
+        return self._id
+
+    @property
+    def objref(self):
+        """Weak reference to object"""
+        return self._objref  # return weak ref to ref'd object
+
+    def __init__(self, bind):
+        """Create a new reference by binding to
+        a group/dataset/committed type
+        """
+        self._id = bind._id
+        self._objref = weakref.ref(bind)
+
+    def __repr__(self):
+        # TBD: this is not consistent with hsds or h5py...
+        if not isinstance(self._id.id, str):
+            raise TypeError("Expected string id")
+        item = None
+
+        collection_type = self._id.collection_type
+        item = f"{collection_type}/{self._id.id}"
+        return item
+
+    def tolist(self):
+        if type(self._id.id) is not str:
+            raise TypeError("Expected string id")
+        if self._id.objtype_code == "d":
+            return [
+                ("datasets/" + self._id.id),
+            ]
+        elif self._id.objtype_code == "g":
+            return [
+                ("groups/" + self._id.id),
+            ]
+        elif self._id.objtype_code == "t":
+            return [
+                ("datatypes/" + self._id.id),
+            ]
+        else:
+            raise TypeError("Unexpected id type")
+
+
+class RegionReference:
+    """
+    Represents an HDF5 region reference
+    """
+
+    @property
+    def id(self):
+        """Low-level identifier appropriate for this object"""
+        return self._id
+
+    @property
+    def objref(self):
+        """Weak reference to object"""
+        return self._objref  # return weak ref to ref'd object
+
+    def __init__(self, bind):
+        """Create a new reference by binding to
+        a group/dataset/committed type
+        """
+        self._id = bind._id
+        self._objref = weakref.ref(bind)
+
+    def __repr__(self):
+        return "<HDF5 region reference>"
+
+
+def special_dtype(**kwds):
+    """Create a new h5py "special" type.  Only one keyword may be given.
+
+    Legal keywords are:
+
+    vlen = basetype
+        Base type for HDF5 variable-length datatype. This can be Python
+        str type or instance of np.dtype.
+        Example: special_dtype( vlen=str )
+
+    enum = (basetype, values_dict)
+        Create a NumPy representation of an HDF5 enumerated type.  Provide
+        a 2-tuple containing an (integer) base dtype and a dict mapping
+        string names to integer values.
+
+    ref = Reference | RegionReference
+        Create a NumPy representation of an HDF5 object or region reference
+        type."""
+
+    if len(kwds) != 1:
+        raise TypeError("Exactly one keyword may be provided")
+
+    name, val = kwds.popitem()
+
+    if name == "vlen":
+
+        return np.dtype("O", metadata={"vlen": val})
+
+    if name == "enum":
+
+        try:
+            dt, enum_vals = val
+        except TypeError:
+            msg = "Enums must be created from a 2-tuple "
+            msg += "(basetype, values_dict)"
+            raise TypeError(msg)
+
+        dt = np.dtype(dt)
+        if dt.kind not in "iu":
+            raise TypeError("Only integer types can be used as enums")
+
+        return np.dtype(dt, metadata={"enum": enum_vals})
+
+    if name == "ref":
+        dt = None
+        if val is Reference:
+            dt = np.dtype("S48", metadata={"ref": Reference})
+        elif val is RegionReference:
+            dt = np.dtype("S48", metadata={"ref": RegionReference})
+        else:
+            raise ValueError("Ref class must be Reference or RegionReference")
+
+        return dt
+
+    raise TypeError(f'Unknown special type "{name}"')
+
+
+def find_item_type(data):
+    """Find the item type of a simple object or collection of objects.
+
+    E.g. [[['a']]] -> str
+
+    The focus is on collections where all items have the same type; we'll return
+    None if that's not the case.
+
+    The aim is to treat numpy arrays of Python objects like normal Python
+    collections, while treating arrays with specific dtypes differently.
+    We're also only interested in array-like collections - lists and tuples,
+    possibly nested - not things like sets or dicts.
+    """
+    if isinstance(data, np.ndarray):
+        if (
+            data.dtype.kind == 'O' and not check_dtype(vlen=data.dtype)
+        ):
+            item_types = {type(e) for e in data.flat}
+        else:
+            return None
+    elif isinstance(data, (list, tuple)):
+        item_types = {find_item_type(e) for e in data}
+    else:
+        return type(data)
+
+    if len(item_types) != 1:
+        return None
+    return item_types.pop()
+
+
+def guess_dtype(data):
+    """ Attempt to guess an appropriate dtype for the object, returning None
+    if nothing is appropriate (or if it should be left up the the array
+    constructor to figure out)
+    """
+
+    # todo - handle RegionReference, Reference
+    item_type = find_item_type(data)
+    if item_type is bytes:
+        return special_dtype(vlen=bytes)
+    if item_type is str:
+        return special_dtype(vlen=str)
+
+    return None
+
+
+def is_float16_dtype(dt):
+    if dt is None:
+        return False
+
+    dt = np.dtype(dt)  # normalize strings -> np.dtype objects
+    return dt.kind == 'f' and dt.itemsize == 2
+
+
+def check_dtype(**kwds):
+    """Check a dtype for h5py special type "hint" information.  Only one
+    keyword may be given.
+
+    vlen = dtype
+        If the dtype represents an HDF5 vlen, returns the Python base class.
+        Currently only builting string vlens (str) are supported.  Returns
+        None if the dtype does not represent an HDF5 vlen.
+
+    enum = dtype
+        If the dtype represents an HDF5 enumerated type, returns the dictionary
+        mapping string names to integer values.  Returns None if the dtype does
+        not represent an HDF5 enumerated type.
+
+    ref = dtype
+        If the dtype represents an HDF5 reference type, returns the reference
+        class (either Reference or RegionReference).  Returns None if the dtype
+        does not represent an HDF5 reference type.
+    """
+
+    if len(kwds) != 1:
+        raise TypeError("Exactly one keyword may be provided")
+
+    name, dt = kwds.popitem()
+
+    if name not in ("vlen", "enum", "ref"):
+        raise TypeError('Unknown special type "%s"' % name)
+
+    try:
+        return dt.metadata[name]
+    except TypeError:
+        return None
+    except KeyError:
+        return None
 
 
 def getTypeResponse(typeItem):
     """
     Convert the given type item  to a predefined type string for
-    predefined integer and floating point types ("H5T_STD_I64LE", et. al).
-    For compound types, recursively iterate through the typeItem and do same
-    conversion for fields of the compound type.
-    """
+        predefined integer and floating point types ("H5T_STD_I64LE", et. al).
+        For compound types, recursively iterate through the typeItem and do
+        same conversion for fields of the compound type."""
     response = None
     if "uuid" in typeItem:
         # committed type, just return uuid
         response = "datatypes/" + typeItem["uuid"]
-    elif typeItem["class"] == "H5T_INTEGER" or typeItem["class"] == "H5T_FLOAT":
+    elif typeItem["class"] in ("H5T_INTEGER", "H5T_FLOAT"):
         # just return the class and base for pre-defined types
         response = {}
         response["class"] = typeItem["class"]
@@ -52,7 +272,7 @@ def getTypeResponse(typeItem):
         for field in typeItem["fields"]:
             fieldItem = {}
             fieldItem["name"] = field["name"]
-            fieldItem["type"] = getTypeResponse(field["type"])  # recursive call
+            fieldItem["type"] = getTypeResponse(field["type"])  # recurse call
             fieldList.append(fieldItem)
         response["fields"] = fieldList
     else:
@@ -68,112 +288,12 @@ def getTypeResponse(typeItem):
     return response
 
 
-def getItemSize(typeItem):
-    """
-    Get size of an item in bytes.
-    For variable length types (e.g. variable length strings),
-    return the string "H5T_VARIABLE"
+def getTypeItem(dt, metadata=None):
     """
-    # handle the case where we are passed a primitive type first
-    if isinstance(typeItem, bytes):
-        typeItem = typeItem.decode("ascii")
-    if isinstance(typeItem, str):
-        for type_prefix in ("H5T_STD_I", "H5T_STD_U", "H5T_IEEE_F"):
-            if typeItem.startswith(type_prefix):
-                num_bits = typeItem[len(type_prefix) :]
-                if num_bits[-2:] in ("LE", "BE"):
-                    num_bits = num_bits[:-2]
-                try:
-                    return int(num_bits) // 8
-                except ValueError:
-                    raise TypeError("Invalid Type")
-        # none of the expect primative types mathched
-        raise TypeError("Invalid Type")
-    if not isinstance(typeItem, dict):
-        raise TypeError("invalid type")
-
-    item_size = 0
-    if "class" not in typeItem:
-        raise KeyError("'class' not provided")
-    typeClass = typeItem["class"]
-
-    if typeClass == "H5T_INTEGER":
-        if "base" not in typeItem:
-            raise KeyError("'base' not provided")
-        item_size = getItemSize(typeItem["base"])
-
-    elif typeClass == "H5T_FLOAT":
-        if "base" not in typeItem:
-            raise KeyError("'base' not provided")
-        item_size = getItemSize(typeItem["base"])
-
-    elif typeClass == "H5T_STRING":
-        if "length" not in typeItem:
-            raise KeyError("'length' not provided")
-        item_size = typeItem["length"]
-
-    elif typeClass == "H5T_VLEN":
-        item_size = "H5T_VARIABLE"
-    elif typeClass == "H5T_OPAQUE":
-        if "size" not in typeItem:
-            raise KeyError("'size' not provided")
-        item_size = int(typeItem["size"])
-
-    elif typeClass == "H5T_ARRAY":
-        if "dims" not in typeItem:
-            raise KeyError("'dims' must be provided for array types")
-        if "base" not in typeItem:
-            raise KeyError("'base' not provided")
-        item_size = getItemSize(typeItem["base"])
-
-    elif typeClass == "H5T_ENUM":
-        if "base" not in typeItem:
-            raise KeyError("'base' must be provided for enum types")
-        item_size = getItemSize(typeItem["base"])
-
-    elif typeClass == "H5T_REFERENCE":
-        item_size = "H5T_VARIABLE"
-    elif typeClass == "H5T_COMPOUND":
-        if "fields" not in typeItem:
-            raise KeyError("'fields' not provided for compound type")
-        fields = typeItem["fields"]
-        if type(fields) is not list:
-            raise TypeError("Type Error: expected list type for 'fields'")
-        if not fields:
-            raise KeyError("no 'field' elements provided")
-        # add up the size of each sub-field
-        for field in fields:
-            if not isinstance(field, dict):
-                raise TypeError("Expected dictionary type for field")
-            if "type" not in field:
-                raise KeyError("'type' missing from field")
-            subtype_size = getItemSize(field["type"])  # recursive call
-            if subtype_size == "H5T_VARIABLE":
-                item_size = "H5T_VARIABLE"
-                break  # don't need to look at the rest
-
-            item_size += subtype_size
-    else:
-        raise TypeError("Invalid type class")
-
-    # calculate array type
-    if "dims" in typeItem and type(item_size) is int:
-        dims = typeItem["dims"]
-        for dim in dims:
-            item_size *= dim
-
-    return item_size
-
-
-"""
     Return type info.
           For primitive types, return string with typename
           For compound types return array of dictionary items
-"""
-
-
-def getTypeItem(dt):
-
+    """
     predefined_int_types = {
         "int8": "H5T_STD_I8",
         "uint8": "H5T_STD_U8",
@@ -184,10 +304,19 @@ def getTypeItem(dt):
         "int64": "H5T_STD_I64",
         "uint64": "H5T_STD_U64",
     }
-    predefined_float_types = {"float32": "H5T_IEEE_F32", "float64": "H5T_IEEE_F64"}
+    predefined_float_types = {
+        "float16": "H5T_IEEE_F16",
+        "float32": "H5T_IEEE_F32",
+        "float64": "H5T_IEEE_F64",
+    }
+
+    dt = np.dtype(dt)  # convert 'int32', np.int32, etc. to a dtype
+
+    if not metadata and dt.metadata:
+        metadata = dt.metadata
 
     type_info = {}
-    if len(dt) > 1 or dt.names:
+    if len(dt):
         # compound type
         names = dt.names
         type_info["class"] = "H5T_COMPOUND"
@@ -204,15 +333,22 @@ def getTypeItem(dt):
         # array type
         type_info["dims"] = dt.shape
         type_info["class"] = "H5T_ARRAY"
-        type_info["base"] = getTypeItem(dt.base)
+        type_info["base"] = getTypeItem(dt.base, metadata=metadata)
     elif dt.kind == "O":
         # vlen string or data
         #
         # check for h5py variable length extension
-        vlen_check = check_dtype(vlen=dt.base)
-        if vlen_check is not None and not isinstance(vlen_check, np.dtype):
-            vlen_check = np.dtype(vlen_check)
-        ref_check = check_dtype(ref=dt.base)
+        vlen_check = None
+        if metadata and "vlen" in metadata:
+            vlen_check = metadata["vlen"]
+            if vlen_check is not None and not isinstance(vlen_check, np.dtype):
+                vlen_check = np.dtype(vlen_check)
+
+        if metadata and "ref" in metadata:
+            ref_check = metadata["ref"]
+        else:
+            ref_check = check_dtype(ref=dt.base)
+
         if vlen_check == bytes:
             type_info["class"] = "H5T_STRING"
             type_info["length"] = "H5T_VARIABLE"
@@ -229,15 +365,15 @@ def getTypeItem(dt):
             type_info["size"] = "H5T_VARIABLE"
             type_info["base"] = getTypeItem(vlen_check)
         elif vlen_check is not None:
-            # unknown vlen type
+            #  unknown vlen type
             raise TypeError("Unknown h5py vlen type: " + str(vlen_check))
         elif ref_check is not None:
             # a reference type
             type_info["class"] = "H5T_REFERENCE"
 
-            if ref_check is Reference:
+            if ref_check.__name__ == "Reference":
                 type_info["base"] = "H5T_STD_REF_OBJ"  # objref
-            elif ref_check is RegionReference:
+            elif ref_check.__name__ == "RegionReference":
                 type_info["base"] = "H5T_STD_REF_DSETREG"  # region ref
             else:
                 raise TypeError("unexpected reference type")
@@ -249,14 +385,40 @@ def getTypeItem(dt):
         type_info["size"] = dt.itemsize
         type_info["tag"] = ""  # todo - determine tag
     elif dt.base.kind == "S":
-        # Fixed length string type
-        type_info["class"] = "H5T_STRING"
-        type_info["charSet"] = "H5T_CSET_ASCII"
+        # check for object reference
+        ref_check = check_dtype(ref=dt.base)
+        if ref_check is not None:
+            # a reference type
+            type_info["class"] = "H5T_REFERENCE"
+
+            if ref_check is Reference:
+                type_info["base"] = "H5T_STD_REF_OBJ"  # objref
+            elif ref_check is RegionReference:
+                type_info["base"] = "H5T_STD_REF_DSETREG"  # region ref
+            else:
+                raise TypeError("unexpected reference type")
+        else:
+            # Fixed length string type
+            type_info["class"] = "H5T_STRING"
         type_info["length"] = dt.itemsize
+        type_info["charSet"] = "H5T_CSET_ASCII"
         type_info["strPad"] = "H5T_STR_NULLPAD"
     elif dt.base.kind == "U":
         # Fixed length unicode type
-        raise TypeError("Fixed length unicode type is not supported")
+        ref_check = check_dtype(ref=dt.base)
+        if ref_check is not None:
+            raise TypeError("unexpected reference type")
+
+        # Fixed length string type with unicode support
+        type_info["class"] = "H5T_STRING"
+
+        # this can be problematic if the encoding of the string is not valid,
+        # or reqires too many bytes.  Use variable length strings to handle all
+        # UTF8 strings correctly
+        type_info["charSet"] = "H5T_CSET_UTF8"
+        # convert from UTF32 length to a fixed length
+        type_info["length"] = dt.itemsize
+        type_info["strPad"] = "H5T_STR_NULLPAD"
 
     elif dt.kind == "b":
         # boolean type - h5py stores as enum
@@ -265,13 +427,14 @@ def getTypeItem(dt):
         if dt.base.byteorder == ">":
             byteorder = "BE"
         # this mapping is an h5py convention for boolean support
-        members = [{"name": "FALSE", "value": 0}, {"name": "TRUE", "value": 1}]
+        bool_false = {"name": "FALSE", "value": 0}
+        bool_true = {"name": "TRUE", "value": 1}
+        members = [bool_false, bool_true]
         type_info["class"] = "H5T_ENUM"
         type_info["members"] = members
         base_info = {"class": "H5T_INTEGER"}
         base_info["base"] = "H5T_STD_I8" + byteorder
         type_info["base"] = base_info
-
     elif dt.kind == "f":
         # floating point type
         type_info["class"] = "H5T_FLOAT"
@@ -280,7 +443,8 @@ def getTypeItem(dt):
             byteorder = "BE"
         if dt.name in predefined_float_types:
             # maps to one of the HDF5 predefined types
-            type_info["base"] = predefined_float_types[dt.base.name] + byteorder
+            float_type = predefined_float_types[dt.base.name]
+            type_info["base"] = float_type + byteorder
         else:
             raise TypeError("Unexpected floating point type: " + dt.name)
     elif dt.kind == "i" or dt.kind == "u":
@@ -291,14 +455,18 @@ def getTypeItem(dt):
         if dt.base.byteorder == ">":
             byteorder = "BE"
 
-        # numpy integer type - but check to see if this is the h5py
+        # numpy integer type - but check to see if this is the hypy
         # enum extension
-        mapping = check_dtype(enum=dt)
-
-        if mapping:
+        if metadata and "enum" in metadata:
             # yes, this is an enum!
+            mapping = metadata["enum"]
             type_info["class"] = "H5T_ENUM"
-            type_info["members"] = [{"name": n, "value": v} for n, v in mapping.items()]
+            members = []
+            for name in mapping:
+                value = mapping[name]
+                item = {"name": name, "value": value}
+                members.append(item)
+            type_info["members"] = members
             if dt.name not in predefined_int_types:
                 raise TypeError("Unexpected integer type: " + dt.name)
             # maps to one of the HDF5 predefined types
@@ -316,11 +484,174 @@ def getTypeItem(dt):
 
     else:
         # unexpected kind
-        raise TypeError("unexpected dtype kind: " + dt.kind)
+        raise TypeError(f"unexpected dtype kind: {dt.kind}")
 
     return type_info
 
 
+def isVlen(dt):
+    """
+    Return True if the type contains variable length elements
+    """
+    is_vlen = False
+    if len(dt):
+        names = dt.names
+        for name in names:
+            if isVlen(dt[name]):
+                is_vlen = True
+                break
+    else:
+        if dt.base.metadata and "vlen" in dt.base.metadata:
+            is_vlen = True
+    return is_vlen
+
+
+def isOpaqueDtype(dt):
+    """
+    Return True if this is an opaque dtype
+    """
+    if dt.kind == "V" and len(dt) == 0 and len(dt.shape) == 0 and not dt.names:
+        return True
+    if dt.metadata and dt.metadata.get('h5py_opaque'):
+        return True
+    return False
+
+
+def getItemSize(typeItem):
+    """
+    Get size of an item in bytes.
+        For variable length types (e.g. variable length strings),
+        return the string "H5T_VARIABLE"
+    """
+    # handle the case where we are passed a primitive type first
+    if isinstance(typeItem, str) or isinstance(typeItem, bytes):
+        for type_prefix in ("H5T_STD_I", "H5T_STD_U", "H5T_IEEE_F"):
+            if typeItem.startswith(type_prefix):
+                nlen = len(type_prefix)
+                num_bits = typeItem[nlen:]
+                if num_bits[-2:] in ("LE", "BE"):
+                    num_bits = num_bits[:-2]
+                try:
+                    return int(num_bits) // 8
+                except ValueError:
+                    raise TypeError("Invalid Type")
+        # none of the expect primative types mathched
+        raise TypeError("Invalid Type")
+    if not isinstance(typeItem, dict):
+        raise TypeError("invalid type")
+
+    item_size = 0
+    if "class" not in typeItem:
+        raise KeyError("'class' not provided")
+    typeClass = typeItem["class"]
+
+    if typeClass == "H5T_INTEGER":
+        if "base" not in typeItem:
+            raise KeyError("'base' not provided")
+        item_size = getItemSize(typeItem["base"])
+
+    elif typeClass == "H5T_FLOAT":
+        if "base" not in typeItem:
+            raise KeyError("'base' not provided")
+        item_size = getItemSize(typeItem["base"])
+
+    elif typeClass == "H5T_STRING":
+        if "length" not in typeItem:
+            raise KeyError("'length' not provided")
+        item_size = typeItem["length"]
+
+    elif typeClass == "H5T_VLEN":
+        item_size = "H5T_VARIABLE"
+    elif typeClass == "H5T_OPAQUE":
+        if "size" not in typeItem:
+            raise KeyError("'size' not provided")
+        item_size = int(typeItem["size"])
+
+    elif typeClass == "H5T_ARRAY":
+        if "dims" not in typeItem:
+            raise KeyError("'dims' must be provided for array types")
+        if "base" not in typeItem:
+            raise KeyError("'base' not provided")
+        item_size = getItemSize(typeItem["base"])
+
+    elif typeClass == "H5T_ENUM":
+        if "base" not in typeItem:
+            raise KeyError("'base' must be provided for enum types")
+        item_size = getItemSize(typeItem["base"])
+
+    elif typeClass == "H5T_REFERENCE":
+        if "length" in typeItem:
+            item_size = typeItem["length"]
+        elif "base" in typeItem and typeItem["base"] == "H5T_STD_REF_OBJ":
+            # obj ref values are in the form: "groups/<id>" or
+            # "datasets/<id>" or "datatypes/<id>"
+            item_size = 48
+        else:
+            item_size = 80  # tb: just take a guess at this for now
+    elif typeClass == "H5T_COMPOUND":
+        if "fields" not in typeItem:
+            raise KeyError("'fields' not provided for compound type")
+        fields = typeItem["fields"]
+        if not isinstance(fields, list):
+            raise TypeError("Type Error: expected list type for 'fields'")
+        if not fields:
+            raise KeyError("no 'field' elements provided")
+        # add up the size of each sub-field
+        for field in fields:
+            if not isinstance(field, dict):
+                raise TypeError("Expected dictionary type for field")
+            if "type" not in field:
+                raise KeyError("'type' missing from field")
+            subtype_size = getItemSize(field["type"])  # recursive call
+            if subtype_size == "H5T_VARIABLE":
+                item_size = "H5T_VARIABLE"
+                break  # don't need to look at the rest
+
+            item_size += subtype_size
+    else:
+        raise TypeError("Invalid type class")
+
+    # calculate array type
+    if "dims" in typeItem and isinstance(item_size, int):
+        dims = typeItem["dims"]
+        for dim in dims:
+            item_size *= dim
+
+    return item_size
+
+
+def getDtypeItemSize(dtype):
+    """ Return size of dtype in bytes
+        For variable length types (e.g. variable length strings),
+        return the string "H5T_VARIABLE
+    """
+    item_size = 0
+    if len(dtype):
+        # compound dtype
+        for i in range(len(dtype)):
+            sub_dt = dtype[i]
+            sub_dt_size = getDtypeItemSize(sub_dt)
+            if sub_dt_size == "H5T_VARIABLE":
+                item_size = "H5T_VARIABLE"  # return variable if any component is variable
+                break
+            item_size += sub_dt_size
+    else:
+        # primitive type
+        if dtype.shape:
+            base_size = getDtypeItemSize(dtype.base)
+            if base_size == "H5T_VARIABLE":
+                item_size = "H5T_VARIABLE"
+            else:
+                nelements = np.prod(dtype.shape)
+                item_size = base_size * nelements
+        else:
+            if dtype.metadata and "vlen" in dtype.metadata:
+                item_size = "H5T_VARIABLE"
+            else:
+                item_size = dtype.itemsize
+    return item_size
+
+
 def getNumpyTypename(hdf5TypeName, typeClass=None):
     predefined_int_types = {
         "H5T_STD_I8": "i1",
@@ -332,7 +663,11 @@ def getNumpyTypename(hdf5TypeName, typeClass=None):
         "H5T_STD_I64": "i8",
         "H5T_STD_U64": "u8",
     }
-    predefined_float_types = {"H5T_IEEE_F32": "f4", "H5T_IEEE_F64": "f8"}
+    predefined_float_types = {
+        "H5T_IEEE_F16": "f2",
+        "H5T_IEEE_F32": "f4",
+        "H5T_IEEE_F64": "f8",
+    }
 
     if len(hdf5TypeName) < 3:
         raise Exception("Type Error: invalid typename: ")
@@ -356,7 +691,6 @@ def getNumpyTypename(hdf5TypeName, typeClass=None):
 
 
 def createBaseDataType(typeItem):
-
     dtRet = None
     if isinstance(typeItem, str):
         # should be one of the predefined types
@@ -371,20 +705,32 @@ def createBaseDataType(typeItem):
         raise KeyError("'class' not provided")
     typeClass = typeItem["class"]
 
+    dims = ""
+    if "dims" in typeItem:
+        if typeClass != "H5T_ARRAY":
+            raise TypeError("'dims' only supported for integer types")
+
+        dims = None
+        if isinstance(typeItem["dims"], int):
+            dims = typeItem["dims"]  # make into a tuple
+        elif not isinstance(typeItem["dims"], list) and not isinstance(
+            typeItem["dims"], tuple
+        ):
+            raise TypeError("expected list or integer for dims")
+        else:
+            dims = typeItem["dims"]
+        dims = str(tuple(dims))
+
     if typeClass == "H5T_INTEGER":
         if "base" not in typeItem:
             raise KeyError("'base' not provided")
-        if "dims" in typeItem:
-            raise TypeError("'dims' not supported for integer types")
         baseType = getNumpyTypename(typeItem["base"], typeClass="H5T_INTEGER")
-        dtRet = np.dtype(baseType)
+        dtRet = np.dtype(dims + baseType)
     elif typeClass == "H5T_FLOAT":
         if "base" not in typeItem:
             raise KeyError("'base' not provided")
-        if "dims" in typeItem:
-            raise TypeError("'dims' not supported for floating point types")
         baseType = getNumpyTypename(typeItem["base"], typeClass="H5T_FLOAT")
-        dtRet = np.dtype(baseType)
+        dtRet = np.dtype(dims + baseType)
     elif typeClass == "H5T_STRING":
         if "length" not in typeItem:
             raise KeyError("'length' not provided")
@@ -392,8 +738,9 @@ def createBaseDataType(typeItem):
             raise KeyError("'charSet' not provided")
 
         if typeItem["length"] == "H5T_VARIABLE":
-            if "dims" in typeItem:
-                raise TypeError("'dims' not supported for variable types")
+            if dims:
+                msg = "ArrayType is not supported for variable len types"
+                raise TypeError(msg)
             if typeItem["charSet"] == "H5T_CSET_ASCII":
                 dtRet = special_dtype(vlen=bytes)
             elif typeItem["charSet"] == "H5T_CSET_UTF8":
@@ -408,20 +755,25 @@ def createBaseDataType(typeItem):
             if typeItem["charSet"] == "H5T_CSET_ASCII":
                 type_code = "S"
             elif typeItem["charSet"] == "H5T_CSET_UTF8":
-                raise TypeError("fixed-width unicode strings are not supported")
+                # use the same type_code as ascii strings
+                # (otherwise, numpy will reserve bytes for UTF32 representation)
+                type_code = "S"
             else:
                 raise TypeError("unexpected 'charSet' value")
-            dtRet = np.dtype(type_code + str(nStrSize))  # fixed size string
+            # a fixed size string
+            dtRet = np.dtype(dims + type_code + str(nStrSize))
     elif typeClass == "H5T_VLEN":
-        if "dims" in typeItem:
-            raise TypeError("'dims' not supported for vlen types")
+        if dims:
+            msg = "ArrayType is not supported for variable len types"
+            raise TypeError(msg)
         if "base" not in typeItem:
             raise KeyError("'base' not provided")
         baseType = createBaseDataType(typeItem["base"])
         dtRet = special_dtype(vlen=np.dtype(baseType))
     elif typeClass == "H5T_OPAQUE":
-        if "dims" in typeItem:
-            raise TypeError("'dims' not supported for opaque types")
+        if dims:
+            msg = "Opaque Type is not supported for variable len types"
+            raise TypeError(msg)
         if "size" not in typeItem:
             raise KeyError("'size' not provided")
         nSize = int(typeItem["size"])
@@ -429,26 +781,19 @@ def createBaseDataType(typeItem):
             raise TypeError("'size' must be non-negative")
         dtRet = np.dtype("V" + str(nSize))
     elif typeClass == "H5T_ARRAY":
-        if "dims" not in typeItem:
+        if not dims:
             raise KeyError("'dims' must be provided for array types")
         if "base" not in typeItem:
             raise KeyError("'base' not provided")
         arrayBaseType = typeItem["base"]
-        if type(arrayBaseType) is dict:
+        if isinstance(arrayBaseType, dict):
             if "class" not in arrayBaseType:
                 raise KeyError("'class' not provided for array base type")
-            if arrayBaseType["class"] not in (
-                "H5T_INTEGER",
-                "H5T_FLOAT",
-                "H5T_STRING",
-                "H5T_COMPOUND",
-            ):
-                raise TypeError(
-                    f"{arrayBaseType['class']}: H5T_ARRAY base type not supported."
-                )
-
-        dt_base = createDataType(arrayBaseType)
-
+            type_classes = ("H5T_INTEGER", "H5T_FLOAT", "H5T_STRING", "H5T_COMPOUND", "H5T_ARRAY")
+            if arrayBaseType["class"] not in type_classes:
+                msg = "Array Type base type must be integer, float, string, compound or array"
+                raise TypeError(msg)
+        baseType = createDataType(arrayBaseType)
         if isinstance(typeItem["dims"], int):
             dims = typeItem["dims"]  # make into a tuple
         elif type(typeItem["dims"]) not in (list, tuple):
@@ -457,11 +802,17 @@ def createBaseDataType(typeItem):
             dims = typeItem["dims"]
         # create an array type of the base type
 
-        dtRet = np.dtype((dt_base, dims))
-
+        dtRet = np.dtype((baseType, dims))
+        """
+        metadata = None
+        if baseType.metadata:
+            metadata = dict(baseType.metadata)
+            dtRet = np.dtype(dims + baseType.str, metadata=metadata)
+        else:
+            dtRet = np.dtype(dims + baseType.str)
+        return dtRet  # return predefined type
+        """
     elif typeClass == "H5T_REFERENCE":
-        if "dims" in typeItem:
-            raise TypeError("'dims' not supported for reference types")
         if "base" not in typeItem:
             raise KeyError("'base' not provided")
         if typeItem["base"] == "H5T_STD_REF_OBJ":
@@ -470,6 +821,7 @@ def createBaseDataType(typeItem):
             dtRet = special_dtype(ref=RegionReference)
         else:
             raise TypeError("Invalid base type for reference type")
+
     elif typeClass == "H5T_ENUM":
         if "base" not in typeItem:
             raise KeyError("Expected 'base' to be provided for enum type")
@@ -477,21 +829,36 @@ def createBaseDataType(typeItem):
         if "class" not in base_json:
             raise KeyError("Expected class field in base type")
         if base_json["class"] != "H5T_INTEGER":
-            raise TypeError("Only integer base types can be used with enum type")
-        if "members" not in typeItem:
-            raise KeyError("'members' not provided for enum type")
-        members = typeItem["members"]
-        if len(members) == 0:
-            raise KeyError("empty enum members")
+            msg = "Only integer base types can be used with enum type"
+            raise TypeError(msg)
+        if "mapping" in typeItem:
+            mapping = typeItem["mapping"]
+        elif "members" in typeItem:
+            mapping = typeItem["members"]  # backward-compatibility for hdf5-json
+        else:
+            raise KeyError("'mapping' not provided for enum type")
+
+        if len(mapping) == 0:
+            raise KeyError("empty enum map")
 
         dt = createBaseDataType(base_json)
-        values_dict = dict((m["name"], m["value"]) for m in members)
-        if (
-            dt.kind == "i"
-            and dt.name == "int8"
-            and len(members) == 2
-            and "TRUE" in values_dict
-            and "FALSE" in values_dict
+        if isinstance(mapping, list):
+            # convert to a dictionary
+            values_dict = dict((m["name"], m["value"]) for m in mapping)
+        elif isinstance(mapping, dict):
+            # just use as is
+            values_dict = mapping
+        else:
+            raise TypeError("Expected dict or list mapping for enum type")
+
+        if all(
+            (
+                dt.kind == "i",
+                dt.name == "int8",
+                len(mapping) == 2,
+                "TRUE" in values_dict,
+                "FALSE" in values_dict,
+            )
         ):
             # convert to numpy boolean type
             dtRet = np.dtype("bool")
@@ -505,14 +872,12 @@ def createBaseDataType(typeItem):
     return dtRet
 
 
-"""
-Create a numpy datatype given a json type
-"""
-
-
 def createDataType(typeItem):
+    """
+    Create a numpy datatype given a json type
+    """
     dtRet = None
-    if isinstance(typeItem, (str, bytes)):
+    if type(typeItem) in (str, bytes):
         # should be one of the predefined types
         dtName = getNumpyTypename(typeItem)
         dtRet = np.dtype(dtName)
@@ -543,20 +908,90 @@ def createDataType(typeItem):
             if "type" not in field:
                 raise KeyError("'type' missing from field")
             field_name = field["name"]
-            if isinstance(field_name, str):
-                # verify the field name is ascii
-                try:
-                    field_name.encode("ascii")
-                except UnicodeDecodeError:
-                    raise TypeError("non-ascii field name not allowed")
+            if not isinstance(field_name, str):
+                raise TypeError("field names must be strings")
+            # verify the field name is ascii
+            try:
+                field_name.encode("ascii")
+            except UnicodeEncodeError:
+                raise TypeError("non-ascii field name not allowed")
 
             dt = createDataType(field["type"])  # recursive call
             if dt is None:
                 raise Exception("unexpected error")
-            subtypes.append((field_name, dt))  # append tuple
+            subtypes.append((field["name"], dt))  # append tuple
 
         dtRet = np.dtype(subtypes)
-
     else:
         dtRet = createBaseDataType(typeItem)  # create non-compound dt
     return dtRet
+
+
+def validateTypeItem(typeItem):
+    """
+    Validate a json type - call createDataType and if no exception,
+       it's valid
+    """
+    createDataType(typeItem)
+    # throws KeyError, TypeError, or ValueError
+
+
+def getBaseTypeJson(type_name):
+    """
+    Return JSON representation of a predefined type string
+    """
+    predefined_int_types = (
+        "H5T_STD_I8",
+        "H5T_STD_U8",
+        "H5T_STD_I16",
+        "H5T_STD_U16",
+        "H5T_STD_I32",
+        "H5T_STD_U32",
+        "H5T_STD_I64",
+        "H5T_STD_U64",
+    )
+    predefined_float_types = ("H5T_IEEE_F16", "H5T_IEEE_F32", "H5T_IEEE_F64")
+    type_json = {}
+    # predefined typenames start with 'H5T' and end with "LE" or "BE"
+    if all(
+        (
+            type_name.startswith("H5T_"),
+            type_name[-1] == "E",
+            type_name[-2] in ("L", "B"),
+        )
+    ):
+        # trime of the "BE/"LE"
+        type_prefix = type_name[:-2]
+        if type_prefix in predefined_int_types:
+            type_json["class"] = "H5T_INTEGER"
+            type_json["base"] = type_name
+        elif type_prefix in predefined_float_types:
+            type_json["class"] = "H5T_FLOAT"
+            type_json["base"] = type_name
+        else:
+            raise TypeError("Invalid type name")
+    else:
+        raise TypeError("Invalid type name")
+    return type_json
+
+
+def getSubType(dt_parent, fields):
+    """ Return a dtype that is a compound type composed of
+        the fields given in the field_names list
+    """
+    if len(dt_parent) == 0:
+        raise TypeError("getSubType - parent must be compound type")
+    if not fields:
+        raise TypeError("null field specification")
+    if isinstance(fields, str):
+        fields = [fields,]  # convert to a list
+
+    field_names = set(dt_parent.names)
+    dt_items = []
+    for field in fields:
+        if field not in field_names:
+            raise TypeError(f"field: {field} is not defined in parent type")
+        dt_items.append((field, dt_parent[field]))
+    dt = np.dtype(dt_items)
+
+    return dt
diff --git a/src/h5json/hsdsstore/hsds_reader.py b/src/h5json/hsdsstore/hsds_reader.py
new file mode 100644
index 0000000..55a8c02
--- /dev/null
+++ b/src/h5json/hsdsstore/hsds_reader.py
@@ -0,0 +1,312 @@
+##############################################################################
+# Copyright by The HDF Group.                                                #
+# All rights reserved.                                                       #
+#                                                                            #
+# This file is part of H5Serv (HDF5 REST Server) Service, Libraries and      #
+# Utilities.  The full HDF5 REST Server copyright notice, including          #
+# terms governing use, modification, and redistribution, is contained in     #
+# the file COPYING, which can be found at the root of the source code        #
+# distribution tree.  If you do not have access to this file, you may        #
+# request a copy from help@hdfgroup.org.                                     #
+##############################################################################
+import logging
+
+from ..objid import getCollectionForId, getUuidFromId
+
+from ..hdf5dtype import createDataType
+from ..array_util import jsonToArray, bytesToArray
+from .. import selections
+from ..h5reader import H5Reader
+from .httpconn import HttpConn
+
+
+class HSDSReader(H5Reader):
+    """
+    This class can be used by HDF5DB to read content from an hdf5-json file
+    """
+
+    def __init__(
+        self,
+        domain_path,
+        app_logger=None,
+        endpoint=None,
+        username=None,
+        password=None,
+        bucket=None,
+        api_key=None,
+        use_session=True,
+        expire_time=0,
+        max_objects=0,
+        max_age=0,
+        retries=3,
+        timeout=30.0,
+    ):
+        if app_logger:
+            self.log = app_logger
+        else:
+            self.log = logging.getLogger()
+
+        self.log.debug("HSDSReader init(")
+
+        kwargs = {}
+        self.log.debug(f"    domain_path: {domain_path}")
+        if endpoint:
+            self.log.debug(f"    endpoint: {endpoint}")
+            kwargs["endpoint"] = endpoint
+        if username:
+            self.log.debug(f"    username: {username}")
+            kwargs["username"] = username
+        if password:
+            self.log.debug(f"    password: {'*' * len(password)}")
+            kwargs["password"] = password
+        if bucket:
+            self.log.debug(f"    bucket: {bucket}")
+            kwargs["bucket"] = bucket
+        if api_key:
+            self.log.debug(f"    apI_key: {'*' * len(api_key)}")
+            kwargs["api_key"] = api_key
+        if use_session:
+            self.log.debug(f"    use_session: {use_session}")
+            kwargs["user_session"] = use_session
+
+        if expire_time:
+            self.log.debug(f"    expire_time: {expire_time}")
+            kwargs["expire_time"] = expire_time
+        if max_objects:
+            self.log.debug(f"    max_objects: {max_objects}")
+            kwargs["max_objects"] = max_objects
+        if max_age:
+            self.log.debug(f"    max_age: {max_age}")
+            kwargs["max_age"] = max_age
+        if retries:
+            self.log.debug(f"    retries: {retries}")
+            kwargs["retries"] = retries
+        if timeout:
+            self.log.debug(f"    timeout: {timeout}")
+            kwargs["timeout"] = timeout
+        # save these for when we create the connection
+        self._http_kwargs = kwargs
+        self._http_conn = None
+
+        super().__init__(domain_path, app_logger=app_logger)
+
+    def open(self):
+        if self._http_conn:
+            return  # open already called
+
+        kwargs = self._http_kwargs
+        http_conn = HttpConn(self.filepath, **kwargs)
+
+        hsds_info = http_conn.serverInfo()
+        self.log.debug(f"got hsds info: {hsds_info}")
+
+        # try to do a GET from the domain
+        req = "/"
+        params = {}
+        """
+        if max_objects is None or max_objects > 0:
+            # get object meta objects
+            # TBD: have hsds support a max limit of objects to return
+            params["getobjs"] = 1
+        params["include_attrs"] = 1
+        params["include_links"] = 1
+        """
+
+        rsp = http_conn.GET(req, params=params)
+
+        if rsp.status_code != 200:
+            # file must exist
+            http_conn.close()
+            raise IOError(rsp.status_code, rsp.reason)
+
+        domain_json = rsp.json()
+        self.log.debug(f"got domain_json: {domain_json}")
+
+        if "root" not in domain_json:
+            http_conn.close()
+            raise IOError(404, "Location is a folder, not a file")
+
+        root_id = domain_json["root"]
+        self._root_id = root_id
+
+        """
+        if "domain_objs" in root_json:
+            domain_objs = root_json["domain_objs"]
+            objdb.load(domain_objs)
+        """
+        if "limits" in domain_json:
+            self._limits = domain_json["limits"]
+        else:
+            self._limits = None
+        if "version" in domain_json:
+            self._version = domain_json["version"]
+        else:
+            self._version = None
+
+        self._http_conn = http_conn
+        self._domain_json = domain_json
+
+        return self._root_id
+
+    @property
+    def http_conn(self):
+        return self._http_conn
+
+    def close(self):
+        if self._http_conn:
+            self._http_conn.close()
+
+    def isClosed(self):
+        if self._http_conn:
+            return False
+        else:
+            return True
+
+    def get_root_id(self):
+        """ Return root id """
+        return self._root_id
+
+    def getObjectById(self, obj_id, include_attrs=True, include_links=True, include_values=False):
+        """ return object with given id """
+
+        collection = getCollectionForId(obj_id)
+
+        req = f"/{collection}/{obj_id}"
+        self.log.debug("sending req: {req}")
+
+        params = {}
+        if include_attrs:
+            params["include_attrs"] = 1
+        if include_links:
+            params["include_links"] = 1
+
+        rsp = self.http_conn.GET(req, params=params)
+
+        if rsp.status_code != 200:
+            raise IOError(rsp.status_code, rsp.reason)
+
+        obj_json = rsp.json()
+        # remove any unneeded keys
+        redundant_keys = ("hrefs", "root", "domain", "bucket", "linkCount", "attributeCount")
+        for key in redundant_keys:
+            if key in obj_json:
+                del obj_json[key]
+
+        self.log.debug(f"got json for id: {obj_id}: {obj_json}")
+        return obj_json
+
+    def getAttribute(self, obj_id, name, includeData=True):
+        """
+        Get attribute given an object id and name
+        returns: JSON object
+        """
+        self.log.debug(f"getAttribute({obj_id}), [{name}], include_data={includeData})")
+        collection = getCollectionForId(obj_id)
+        req = f"/{collection}/{obj_id}/attributes/{name}"
+
+        params = {}
+        params["IncludeData"] = 1 if includeData else 0
+
+        rsp = self.http_conn.GET(req, params=params)
+
+        if rsp.status_code in (404, 410):
+            self.log.warning(f"attribute {name} not found")
+            return None
+
+        if rsp.status_code != 200:
+            self.log.error(f"GET {req} failed with status_code: {rsp.status_code}")
+            raise IOError(rsp.status_code, rsp.reason)
+        attr_json = rsp.json()
+
+        if "hrefs" in attr_json:
+            del attr_json["hrefs"]
+
+        return attr_json
+
+    def getDtype(self, obj_json):
+        """ Return the dtype for the type given by obj_json """
+        if "type" not in obj_json:
+            raise KeyError("no type item found")
+        type_item = obj_json["type"]
+        if isinstance(type_item, str) and type_item.startswith("datatypes/"):
+            # this is a reference to a committed type
+            ctype_id = "t-" + getUuidFromId(type_item)
+            ctype_json = self.getObjectById(ctype_id)
+            if "type" not in ctype_json:
+                raise KeyError(f"Unexpected datatype: {ctype_json}")
+            # Use the ctype's item json
+            type_item = ctype_json["type"]
+        dtype = createDataType(type_item)
+        return dtype
+
+    def getDatasetValues(self, dset_id, sel=None, dtype=None):
+        """
+        Get values from dataset identified by obj_id.
+        If a slices list or tuple is provided, it should have the same
+        number of elements as the rank of the dataset.
+        """
+
+        self.log.debug(f"getDatasetValues({dset_id}), sel={sel}")
+        collection = getCollectionForId(dset_id)
+        if collection != "datasets":
+            msg = f"unexpected id: {dset_id} for getDatasetValues"
+            self.log.warning(msg)
+            return ValueError(msg)
+
+        if sel is None or sel.select_type == selections.H5S_SELECT_ALL:
+            query_param = None  # just return the entire array
+        elif isinstance(sel, (selections.SimpleSelection, selections.FancySelection)):
+            query_param = sel.getQueryParam()
+        else:
+            raise NotImplementedError(f"selection type: {type(sel)} not supported")
+
+        mtype = dtype  # TBD - support read time dtype
+        mshape = sel.mshape
+
+        req = f"/{collection}/{dset_id}/value"
+        params = {}
+
+        if query_param:
+            params["select"] = query_param
+
+        if mtype.names != dtype.names:
+            params["fields"] = ":".join(mtype.names)
+
+        MAX_SELECT_QUERY_LEN = 100
+        if len(query_param) > MAX_SELECT_QUERY_LEN:
+            # use a post method to avoid possible long query strings
+            try:
+                rsp = self.http_conn.POST(req, body=params, format="binary")
+            except IOError as ioe:
+                self.log.info(f"got IOError: {ioe.errno}")
+                raise IOError(f"Error retrieving data: {ioe.errno}")
+        else:
+            # make a http GET
+            try:
+                rsp = self.http_conn.GET(req, params=params, format="binary")
+            except IOError as ioe:
+                self.log.info(f"got IOError: {ioe.errno}")
+                raise IOError(ioe.errno, "Error retrieving data")
+
+        if rsp.status_code != 200:
+            self.log.info(f"got http error: {rsp.status_code}")
+            raise IOError(rsp.status_code, "Error retrieving data")
+
+        if rsp.is_binary:
+            # got binary response
+            self.log.info(f"binary response, {len(rsp.text)} bytes")
+            arr = bytesToArray(rsp.text, mtype, mshape)
+        else:
+            # got JSON response
+            # need some special conversion for compound types --
+            # each element must be a tuple, but the JSON decoder
+            # gives us a list instead.
+            self.log.info("json response")
+
+            data = rsp.json()["value"]
+            # self.log.debug(data)
+
+            arr = jsonToArray(mshape, mtype, data)
+            self.log.debug(f"jsonToArray returned: {arr}")
+
+        return arr
diff --git a/src/h5json/hsdsstore/hsds_writer.py b/src/h5json/hsdsstore/hsds_writer.py
new file mode 100644
index 0000000..c4a7c39
--- /dev/null
+++ b/src/h5json/hsdsstore/hsds_writer.py
@@ -0,0 +1,322 @@
+##############################################################################
+# Copyright by The HDF Group.                                                #
+# All rights reserved.                                                       #
+#                                                                            #
+# This file is part of H5Serv (HDF5 REST Server) Service, Libraries and      #
+# Utilities.  The full HDF5 REST Server copyright notice, including          #
+# terms governing use, modification, and redistribution, is contained in     #
+# the file COPYING, which can be found at the root of the source code        #
+# distribution tree.  If you do not have access to this file, you may        #
+# request a copy from help@hdfgroup.org.                                     #
+##############################################################################
+import logging
+import time
+
+from ..objid import getCollectionForId, getUuidFromId
+
+from ..hdf5dtype import createDataType
+from ..array_util import jsonToArray, bytesToArray
+from .. import selections
+from ..h5writer import H5Writer
+from .httpconn import HttpConn
+
+
+class HSDSWriter(H5Writer):
+    """
+    This class can be used by HDF5DB to read content from an hdf5-json file
+    """
+
+    def __init__(
+        self,
+        domain_path,
+        append=False,
+        no_data=False,
+        app_logger=None,
+        endpoint=None,
+        username=None,
+        password=None,
+        bucket=None,
+        api_key=None,
+        use_session=True,
+        expire_time=0,
+        max_objects=0,
+        max_age=0,
+        retries=3,
+        timeout=30.0,
+        track_order=False,
+        owner=None,
+        linked_domain=None
+
+    ):
+        if app_logger:
+            self.log = app_logger
+        else:
+            self.log = logging.getLogger()
+
+        if append:
+            self._init = False
+        else:
+            self._init = True
+
+        if no_data:
+            self._no_data = True
+        else:
+            self._no_data = False
+
+        self.log.debug("HSDSWriter init")
+
+        kwargs = {}
+        self.log.debug(f"    domain_path: {domain_path}")
+        self.log.debug(f"    append: {append}")
+        if endpoint:
+            self.log.debug(f"    endpoint: {endpoint}")
+            kwargs["endpoint"] = endpoint
+        if username:
+            self.log.debug(f"    username: {username}")
+            kwargs["username"] = username
+        if password:
+            self.log.debug(f"    password: {'*' * len(password)}")
+            kwargs["password"] = password
+        if bucket:
+            self.log.debug(f"    bucket: {bucket}")
+            kwargs["bucket"] = bucket
+        if api_key:
+            self.log.debug(f"    apI_key: {'*' * len(api_key)}")
+            kwargs["api_key"] = api_key
+        if use_session:
+            self.log.debug(f"    use_session: {use_session}")
+            kwargs["user_session"] = use_session
+        if expire_time:
+            self.log.debug(f"    expire_time: {expire_time}")
+            kwargs["expire_time"] = expire_time
+        if max_objects:
+            self.log.debug(f"    max_objects: {max_objects}")
+            kwargs["max_objects"] = max_objects
+        if max_age:
+            self.log.debug(f"    max_age: {max_age}")
+            kwargs["max_age"] = max_age
+        if retries:
+            self.log.debug(f"    retries: {retries}")
+            kwargs["retries"] = retries
+        if timeout:
+            self.log.debug(f"    timeout: {timeout}")
+            kwargs["timeout"] = timeout
+        self._http_kwargs = kwargs  # save for when we create the connection
+
+        super().__init__(domain_path, app_logger=app_logger)
+
+        self._http_conn = None
+        self._root_id = None
+        self._append = append
+        self._owner = owner
+        self._track_order = track_order
+        self._linked_domain = linked_domain
+        self._domain_json = None
+        self._last_flush_time = 0
+
+    def open(self):
+        """ setup domain for writing """
+
+        if self._http_conn:
+            http_conn = self._http_conn
+        else:
+            kwargs = self._http_kwargs
+            http_conn = HttpConn(self.filepath, **kwargs)
+            if self._append:
+                http_conn._mode = "a"
+            self._http_conn = http_conn
+            hsds_info = http_conn.serverInfo()
+            self.log.debug(f"got hsds info: {hsds_info}")
+
+        if not self._domain_json:
+            # haven't fetched the domain json yet, do it now
+
+            # try to do a GET from the domain
+            req = "/"
+            params = {}
+            """
+            if max_objects is None or max_objects > 0:
+                # get object meta objects
+                # TBD: have hsds support a max limit of objects to return
+                params["getobjs"] = 1
+                params["include_attrs"] = 1
+                params["include_links"] = 1
+            """
+
+            domain_json = None
+            rsp = http_conn.GET(req, params=params)
+
+            if rsp.status_code not in (200, 404, 410):
+                msg = f"Got status code: {rsp.status_code} on initial domain get"
+                self.log.warning(msg)
+                raise IOError(msg)
+
+            if rsp.status_code == 200:
+                if self._append:
+                    # domain exists already
+                    domain_json = rsp.json()
+                    if "root" not in domain_json:
+                        # this a folder not a domain
+                        self.log.warning(f"folder: {self.filepath} has no root property")
+                        http_conn.close()
+                        raise IOError(404, "Location is a folder, not a file")
+                else:
+                    # not append - delete existing domain
+                    self.log.info(f"sending delete request for {self.filepath}")
+                    delete_rsp = http_conn.DELETE(req, params=params)
+                    if delete_rsp.status_code not in (200, 410):
+                        # failed to delete
+                        http_conn.close()
+                        raise IOError(rsp.status_code, rsp.reason)
+
+            if not domain_json:
+                # domain doesn't exist, create it
+                body = {}
+                if self.db.root_id:
+                    # initialize domain using the db's root_id
+                    body["root_id"] = self.db.root_id
+                if self._owner:
+                    body["owner"] = self._owner
+                if self._linked_domain:
+                    body["linked_domain"] = self._linked_domain
+                if self._track_order:
+                    create_props = {"CreateOrder": 1}
+                    group_body = {"creationProperties": create_props}
+                    body["group"] = group_body
+                rsp = http_conn.PUT(req, params=params, body=body)
+                if rsp.status_code != 201:
+                    http_conn.close()
+                    raise IOError(rsp.status_code, rsp.reason)
+                domain_json = rsp.json()
+                self.log.info(f"got rsp on PUT domain: {domain_json}")
+                if "root" not in domain_json:
+                    http_conn.close()
+                    raise IOError(404, "Unexpected error")
+
+            self.log.debug(f"got domain_json: {domain_json}")
+
+            if "root" not in domain_json:
+                http_conn.close()
+                raise IOError(404, "Location is a folder, not a file")
+
+            root_id = domain_json["root"]
+
+            self._root_id = root_id
+
+            if "limits" in domain_json:
+                self._limits = domain_json["limits"]
+            else:
+                self._limits = None
+            if "version" in domain_json:
+                self._version = domain_json["version"]
+            else:
+                self._version = None
+
+            self._domain_json = domain_json
+
+        return self._root_id
+
+    @property
+    def http_conn(self):
+        return self._http_conn
+
+    def createObjects(self, obj_ids):
+        MAX_OBJECTS_PER_REQUEST = 1
+        collections = ("groups", "datasets", "datatypes")
+        col_items = {}
+        for collection in collections:
+            col_items[collection] = []
+
+        for obj_id in obj_ids:
+            if obj_id == self._root_id:
+                continue  # this was created when the domain was
+            collection = getCollectionForId(obj_id)
+            obj_json = self.db.getObjectById(obj_id)
+            item = {"id": obj_id}
+            for key in ("links", "attributes"):
+                if key in obj_json:
+                    item[key] = obj_json[key]
+            items = col_items[collection]
+            items.append(item)
+            if len(items) == MAX_OBJECTS_PER_REQUEST:
+                print("items:", items)
+                post_rsp = self.http_conn.POST("/" + collection, items)
+                print("post_rsp.status_code:", post_rsp.status_code)
+                if post_rsp.is_json:
+                    print("post_rsp.json:", post_rsp.json())
+                items.clear()
+
+        # handle any remainder items
+        for collection in collections:
+            items = col_items[collection]
+            if items:
+                self.http_conn.POST("/" + collection, items)
+
+    def updateLinks(self, grp_ids):
+        """ update any modified links of the given objects """
+
+        print("updateLinks:", grp_ids)
+        body = {}  # body will hold a map of grp ids to link lists
+
+        for grp_id in grp_ids:
+            if getCollectionForId(grp_id) != "groups":
+                continue  # ignore datasets and datatypes
+            grp_json = self.db.getObjectById(grp_id)
+            grp_links = grp_json["links"]
+            print(f"grp_id {grp_id} links: {grp_links}")
+            for link_json in grp_links:
+                if "created" not in link_json:
+                    self.log.error(f"hsds_writer> expected created timestamp in link: {link_json}")
+                created = link_json["created"]
+                if created > self._last_flush_time:
+                    # new link, add to our list
+                    if grp_id not in body:
+                        body[grp_id] = {}
+
+        if body:
+            print("updateLinks, body:", body)
+
+    def flush(self):
+        """ Write dirty items """
+
+        if not self.db:
+            # no db set yet
+            return False
+        self.log.info("hsds_writer.flush()")
+        self.log.debug(f"    new object count: {len(self.db.new_objects)}")
+        self.log.debug(f"    dirty object count: {len(self.db.dirty_objects)}")
+        self.log.debug(f"    deleted object count: {len(self.db.deleted_objects)}")
+
+        if self._init:
+            # initialize all existing objects
+            self.log.debug(f"flush -- init is true, self.db: {self.db.db}")
+            for obj_id in self.db:
+                self.log.debug(f"init: {obj_id}")
+            self.createObjects(self.db.db.keys())
+            self._init = False
+        elif self.db.new_objects:
+            for obj_id in self.db.new_objects:
+                self.log.debug(f"new obj id: {obj_id}")
+            self.createObjects(self.db.new_objects)
+
+        for obj_id in self.db.dirty_objects:
+            self.log.debug(f"dirty object id: {obj_id}")
+            self.updateLinks(self.db.dirty_objects)
+
+        for obj_id in self.db.deleted_objects:
+            self.log.debug(f"deleted object: {obj_id}")
+
+        self._last_flush_time = time.time()
+        return True  # all objects written successfully
+
+    def close(self):
+        # over-ride of H5Writer method
+        self.flush()
+
+    def isClosed(self):
+        """ return closed status """
+        return False if self._http_conn else True
+
+    def get_root_id(self):
+        """ Return root id """
+        return self._root_id
diff --git a/src/h5json/hsdsstore/httpconn.py b/src/h5json/hsdsstore/httpconn.py
new file mode 100644
index 0000000..14b3d54
--- /dev/null
+++ b/src/h5json/hsdsstore/httpconn.py
@@ -0,0 +1,808 @@
+##############################################################################
+# Copyright by The HDF Group.                                                #
+# All rights reserved.                                                       #
+#                                                                            #
+# This file is part of HSDS (HDF5 REST Server) Service, Libraries and        #
+# Utilities.  The full HDF5 REST Server copyright notice, including          #
+# terms governing use, modification, and redistribution, is contained in     #
+# the file COPYING, which can be found at the root of the source code        #
+# distribution tree.  If you do not have access to this file, you may        #
+# request a copy from help@hdfgroup.org.                                     #
+##############################################################################
+
+from __future__ import absolute_import
+
+import os
+import sys
+import time
+import base64
+
+import requests
+import requests_unixsocket
+from requests import ConnectionError
+from requests.adapters import HTTPAdapter, Retry
+import json
+import logging
+
+from .. import openid
+from .. import config
+
+
+def eprint(*args, **kwargs):
+    print(*args, file=sys.stderr, **kwargs)
+
+
+DEFAULT_TIMEOUT = (
+    10,
+    1000,
+)  # #20  # 180  # seconds - allow time for hsds service to bounce
+
+"""
+def verifyCert(self):
+    # default to validate CERT for https requests, unless
+    # the H5PYD_VERIFY_CERT environment variable is set and True
+    #
+    # TBD: set default to True once the signing authority of data.hdfgroup.org is
+    # recognized
+    if "H5PYD_VERIFY_CERT" in os.environ:
+        verify_cert = os.environ["H5PYD_VERIFY_CERT"].upper()
+        if verify_cert.startswith('F'):
+            return False
+    return True
+"""
+
+
+def getAzureApiKey():
+    """construct API key for Active Directory if configured"""
+    # TBD: GoogleID?
+
+    api_key = None
+
+    # if Azure AD ids are set, pass them to HttpConn via api_key dict
+    cfg = config.get_config()  # pulls in state from a .hscfg file (if found).
+
+    ad_app_id = None  # Azure AD HSDS Server id
+    if "HS_AD_APP_ID" in os.environ:
+        ad_app_id = os.environ["HS_AD_APP_ID"]
+    elif "hs_ad_app_id" in cfg:
+        ad_app_id = cfg["hs_ad_app_id"]
+    ad_tenant_id = None  # Azure AD tenant id
+    if "HS_AD_TENANT_ID" in os.environ:
+        ad_tenant_id = os.environ["HS_AD_TENANT_ID"]
+    elif "hs_ad_tenant_id" in cfg:
+        ad_tenant_id = cfg["hs_ad_tenant_id"]
+
+    ad_resource_id = None  # Azure AD resource id
+    if "HS_AD_RESOURCE_ID" in os.environ:
+        ad_resource_id = os.environ["HS_AD_RESOURCE_ID"]
+    elif "hs_ad_resource_id" in cfg:
+        ad_resource_id = cfg["hs_ad_resource_id"]
+
+    ad_client_secret = None  # Azure client secret
+    if "HS_AD_CLIENT_SECRET" in os.environ:
+        ad_client_secret = os.environ["HS_AD_CLIENT_SECRET"]
+    elif "hs_ad_client_secret" in cfg:
+        ad_client_secret = cfg["hs_ad_client_secret"]
+
+    if ad_app_id and ad_tenant_id and ad_resource_id:
+        # contruct dict to pass to HttpConn
+        api_key = {
+            "AD_APP_ID": ad_app_id,
+            "AD_TENANT_ID": ad_tenant_id,
+            "AD_RESOURCE_ID": ad_resource_id,
+            "openid_provider": "azure",
+        }
+        # optional config
+        if ad_client_secret:
+            api_key["AD_CLIENT_SECRET"] = ad_client_secret
+    return api_key  # None if AAD not configured
+
+
+def getKeycloakApiKey():
+    # check for keycloak next
+    cfg = config.get_config()  # pulls in state from a .hscfg file (if found).
+    api_key = None
+    # check to see if we are configured for keycloak authentication
+    if "HS_KEYCLOAK_URI" in os.environ:
+        keycloak_uri = os.environ["HS_KEYCLOAK_URI"]
+    elif "hs_keycloak_uri" in cfg:
+        keycloak_uri = cfg["hs_keycloak_uri"]
+    else:
+        keycloak_uri = None
+    if "HS_KEYCLOAK_CLIENT_ID" in os.environ:
+        keycloak_client_id = os.environ["HS_KEYCLOAK_CLIENT_ID"]
+    elif "hs_keycloak_client_id" in cfg:
+        keycloak_client_id = cfg["hs_keycloak_client_id"]
+    else:
+        keycloak_client_id = None
+    if "HS_KEYCLOAK_REALM" in os.environ:
+        keycloak_realm = cfg["HS_KEYCLOAK_REALM"]
+    elif "hs_keycloak_realm" in cfg:
+        keycloak_realm = cfg["hs_keycloak_realm"]
+    else:
+        keycloak_realm = None
+
+    if keycloak_uri and keycloak_client_id and keycloak_uri:
+        api_key = {
+            "keycloak_uri": keycloak_uri,
+            "keycloak_client_id": keycloak_client_id,
+            "keycloak_realm": keycloak_realm,
+            "openid_provider": "keycloak",
+        }
+    return api_key
+
+
+class HttpResponse:
+    """ wrapper for http request responses """
+    def __init__(self, rsp, logger=None):
+        self._rsp = rsp
+        self._logger = logger
+        if logger is None:
+            self.log = logging
+        else:
+            self.log = logging.getLogger(logger)
+        self._text = None
+
+    @property
+    def status_code(self):
+        """ return response status code """
+        return self._rsp.status_code
+
+    @property
+    def reason(self):
+        """ return response reason """
+        return self._rsp.reason
+
+    @property
+    def content_type(self):
+        """ return content type """
+        rsp = self._rsp
+        if 'Content-Type' in rsp.headers:
+            content_type = rsp.headers['Content-Type']
+        else:
+            content_type = ""
+        return content_type
+
+    @property
+    def content_length(self):
+        """ Return length of response if available """
+        if 'Content-Length' in self._rsp.headers:
+            content_length = self._rsp.headers['Content-Length']
+        else:
+            content_length = None
+        return content_length
+
+    @property
+    def is_binary(self):
+        """ return True if the response indicates binary data """
+
+        if self.content_type == "application/octet-stream":
+            return True
+        else:
+            return False
+
+    @property
+    def is_json(self):
+        """ return true if response indicates json """
+
+        if self.content_type.startswith("application/json"):
+            return True
+        else:
+            return False
+
+    @property
+    def text(self):
+        """ getresponse content as bytes """
+
+        if not self._text:
+            rsp = self._rsp
+            if not self.is_binary:
+                # hex encoded response?
+                # this is returned by API Gateway for lambda responses
+                self._text = bytes.fromhex(rsp.text)
+            else:
+                if self.content_length:
+                    self.log.debug(f"got binary response, {self.content_length} bytes")
+                else:
+                    self.log.debug("got binary response, content_length unknown")
+
+                HTTP_CHUNK_SIZE = 4096
+                http_chunks = []
+                downloaded_bytes = 0
+                for http_chunk in rsp.iter_content(chunk_size=HTTP_CHUNK_SIZE):
+                    if http_chunk:  # filter out keep alive chunks
+                        self.log.debug(f"got http_chunk - {len(http_chunk)} bytes")
+                        downloaded_bytes += len(http_chunk)
+                        http_chunks.append(http_chunk)
+                if len(http_chunks) == 0:
+                    raise IOError("no data returned")
+                if len(http_chunks) == 1:
+                    # can return first and only chunk as response
+                    self._text = http_chunks[0]
+                else:
+                    msg = f"retrieved {len(http_chunks)} http_chunks "
+                    msg += f" {downloaded_bytes} total bytes"
+                    self.log.info(msg)
+                    self._text = bytearray(downloaded_bytes)
+                    index = 0
+                    for http_chunk in http_chunks:
+                        self._text[index:(index + len(http_chunk))] = http_chunk
+                        index += len(http_chunk)
+
+        return self._text
+
+    def json(self):
+        """ Return json from response"""
+
+        rsp = self._rsp
+
+        if not self.is_json:
+            raise IOError("response is not json")
+
+        rsp_json = json.loads(rsp.text)
+        self.log.debug(f"rsp_json - {len(rsp.text)} bytes")
+        return rsp_json
+
+
+class HttpConn:
+    """
+    Some utility methods based on equivalents in base class.
+    """
+
+    def __init__(
+        self,
+        domain_name,
+        endpoint=None,
+        username=None,
+        password=None,
+        bucket=None,
+        api_key=None,
+        mode="a",
+        use_session=True,
+        expire_time=1.0,
+        max_objects=None,
+        max_age=1.0,
+        logger=None,
+        retries=3,
+        timeout=DEFAULT_TIMEOUT,
+        **kwds,
+    ):
+        self._domain = domain_name
+        self._mode = mode
+        self._domain_json = None
+        self._use_session = use_session
+        self._retries = retries
+        self._timeout = timeout
+        self._api_key = api_key
+        self._s = None  # Sessions
+        self._server_info = None
+        self._external_refs = []
+
+        self._logger = logger
+        if logger is None:
+            self.log = logging
+        else:
+            self.log = logging.getLogger(logger)
+        msg = f"HttpConn.init(domain: {domain_name} use_session: {use_session} "
+        msg += f"expire_time: {expire_time:6.2f} sec retries: {retries}"
+        self.log.debug(msg)
+
+        if self._timeout != DEFAULT_TIMEOUT:
+            self.log.info(f"HttpConn.init - timeout = {self._timeout}")
+        if not endpoint:
+            if "HS_ENDPOINT" in os.environ:
+                endpoint = os.environ["HS_ENDPOINT"]
+
+        if not endpoint:
+            msg = "no endpoint set"
+            raise ValueError(msg)
+
+        self._endpoint = endpoint
+
+        if not username:
+            if "HS_USERNAME" in os.environ:
+                username = os.environ["HS_USERNAME"]
+        if isinstance(username, str) and (not username or username.upper() == "NONE"):
+            username = None
+        self._username = username
+
+        if not password:
+            if "HS_PASSWORD" in os.environ:
+                password = os.environ["HS_PASSWORD"]
+        if isinstance(password, str) and (not password or password.upper() == "NONE"):
+            password = None
+        self._password = password
+
+        if not bucket:
+            if "HS_BUCKET" in os.environ:
+                bucket = os.environ["HS_BUCKET"]
+            if isinstance(bucket, str) and (not bucket or bucket.upper() == "NONE"):
+                bucket = None
+        self._bucket = bucket
+
+        if api_key is None and "HS_API_KEY" in os.environ:
+            api_key = os.environ["HS_API_KEY"]
+        if isinstance(api_key, str) and (not api_key or api_key.upper() == "NONE"):
+            api_key = None
+        if not api_key:
+            api_key = getAzureApiKey()
+        if not api_key:
+            api_key = getKeycloakApiKey()
+
+        # Convert api_key to OpenIDHandler
+        if isinstance(api_key, dict):
+            # Maintain Azure-defualt backwards compatibility, but allow
+            # both environment variable and kwarg override.
+            provider = api_key.get("openid_provider", "azure")
+            if provider == "azure":
+                self.log.debug("creating OpenIDHandler for Azure")
+                self._api_key = openid.AzureOpenID(endpoint, api_key)
+            elif provider == "google":
+                self.log.debug("creating OpenIDHandler for Google")
+
+                config = api_key.get("client_secret", None)
+                scopes = api_key.get("scopes", None)
+                self._api_key = openid.GoogleOpenID(
+                    endpoint, config=config, scopes=scopes
+                )
+            elif provider == "keycloak":
+                self.log.debug("creating OpenIDHandler for Keycloak")
+
+                # for Keycloak, pass in username and password
+                self._api_key = openid.KeycloakOpenID(
+                    endpoint, config=api_key, username=username, password=password
+                )
+            else:
+                self.log.error(f"Unknown openid provider: {provider}")
+
+    def __del__(self):
+        if self._s:
+            self.log.debug("close session")
+            self._s.close()
+            self._s = None
+
+    def getHeaders(self, username=None, password=None, headers=None):
+
+        if headers is None:
+            headers = {}
+
+        # This should be the default - but explicitly set anyway
+        if "Accept-Encoding" not in headers:
+            headers['Accept-Encoding'] = "deflate, gzip"
+
+        elif "Authorization" in headers:
+            return headers  # already have auth key
+        if username is None:
+            username = self._username
+        if password is None:
+            password = self._password
+
+        if self._api_key:
+            self.log.debug("using api key")
+            # use OpenId handler to get a bearer token
+            token = ""
+
+            # Get a token, possibly refreshing if needed.
+            if isinstance(self._api_key, openid.OpenIDHandler):
+                token = self._api_key.token
+
+            # Token was provided as a string.
+            elif isinstance(self._api_key, str):
+                token = self._api_key
+
+            if token:
+                auth_string = b"Bearer " + token.encode("ascii")
+                headers["Authorization"] = auth_string
+        elif username is not None and password is not None:
+            self.log.debug(f"use basic auth with username: {username}")
+            auth_string = username + ":" + password
+            auth_string = auth_string.encode("utf-8")
+            auth_string = base64.b64encode(auth_string)
+            auth_string = b"Basic " + auth_string
+            headers["Authorization"] = auth_string
+        else:
+            self.log.debug("no auth header")
+            # no auth header
+            pass
+
+        return headers
+
+    def serverInfo(self):
+        if self._server_info:
+            return self._server_info
+
+        if self._endpoint is None:
+            raise IOError("object not initialized")
+
+        # make an about request
+        rsp = self.GET("/about")
+        if rsp.status_code != 200:
+            raise IOError(rsp.status_code, rsp.reason)
+        server_info = rsp.json()
+        if server_info:
+            self._server_info = server_info
+        return server_info
+
+    def server_version(self):
+        server_info = self.serverInfo()
+        if "hsds_version" in server_info:
+            server_version = server_info["hsds_version"]
+        else:
+            # no standard way to get version for other implements...
+            server_version = None
+        return server_version
+
+    def verifyCert(self):
+        # default to validate CERT for https requests, unless
+        # the H5PYD_VERIFY_CERT environment variable is set and True
+        #
+        # TBD: set default to True once the signing authority of data.hdfgroup.org is
+        # recognized
+        if "H5PYD_VERIFY_CERT" in os.environ:
+            verify_cert = os.environ["H5PYD_VERIFY_CERT"].upper()
+            if verify_cert.startswith("F"):
+                return False
+        return True
+
+    def GET(self, req, format="json", params=None, headers=None):
+        if self._endpoint is None:
+            raise IOError("object not initialized")
+        # check that domain is defined (except for some specific requests)
+        if req not in ("/domains", "/about", "/info", "/") and self._domain is None:
+            raise IOError(f"no domain defined: req: {req}")
+
+        rsp = None
+
+        headers = self.getHeaders(headers=headers)
+
+        if params is None:
+            params = {}
+        if "domain" not in params:
+            params["domain"] = self._domain
+        if "bucket" not in params and self._bucket:
+            params["bucket"] = self._bucket
+        if self._api_key and not isinstance(self._api_key, dict):
+            params["api_key"] = self._api_key
+        domain = params["domain"]
+        self.log.debug(f"GET: {req} [{domain}] bucket: {self._bucket}")
+
+        if format == "binary":
+            headers["accept"] = "application/octet-stream"
+
+        self.log.info(f"GET: {self._endpoint + req} [{params['domain']}] timeout: {self._timeout}")
+
+        for k in params:
+            if k != "domain":
+                v = params[k]
+                self.log.debug(f"GET params {k}:{v}")
+
+        try:
+            s = self.session
+            stream = True  # tbd  - config for no streaming?
+            ts = time.time()
+            rsp = s.get(
+                self._endpoint + req,
+                params=params,
+                headers=headers,
+                stream=stream,
+                timeout=self._timeout,
+                verify=self.verifyCert(),
+            )
+            elapsed = time.time() - ts
+            self.log.info(f"status: GET {rsp.status_code}, elapsed: {elapsed:.4f}")
+        except ConnectionError as ce:
+            self.log.error(f"connection error: {ce}")
+            raise IOError("Connection Error")
+        except Exception as e:
+            self.log.error(f"got {type(e)} exception: {e}")
+            raise IOError("Unexpected exception")
+
+        if rsp.status_code != 200:
+            self.log.warning(f"GET {req} returned status: {rsp.status_code}")
+
+        return HttpResponse(rsp)
+
+    def PUT(self, req, body=None, format="json", params=None, headers=None):
+        if self._endpoint is None:
+            raise IOError("object not initialized")
+        if self._domain is None:
+            raise IOError("no domain defined")
+
+        if params:
+            self.log.info(f"PUT params: {params}")
+        else:
+            params = {}
+
+        if "domain" not in params:
+            params["domain"] = self._domain
+        if "bucket" not in params and self._bucket:
+            params["bucket"] = self._bucket
+        if self._api_key:
+            params["api_key"] = self._api_key
+
+        # verify the file was open for modification
+        if self._mode == "r":
+            raise IOError("Unable to create group (No write intent on file)")
+
+        # try to do a PUT to the domain
+
+        headers = self.getHeaders(headers=headers)
+
+        if format == "binary":
+            headers["Content-Type"] = "application/octet-stream"
+            # binary write
+            data = body
+        else:
+            headers["Content-Type"] = "application/json"
+            data = json.dumps(body)
+
+        self.log.info(f"PUT: {req} format: {format} [{len(data)} bytes]")
+
+        try:
+            s = self.session
+            ts = time.time()
+            rsp = s.put(
+                self._endpoint + req,
+                data=data,
+                headers=headers,
+                params=params,
+                verify=self.verifyCert(),
+            )
+            elapsed = time.time() - ts
+            self.log.info(f"status: PUT {rsp.status_code}, elapsed: {elapsed:.4f}")
+        except ConnectionError as ce:
+            self.log.error(f"connection error: {ce}")
+            raise IOError("Connection Error")
+
+        if rsp.status_code == 201 and req == "/":
+            self.log.info("clearing domain_json cache")
+            self._domain_json = None
+        if rsp.status_code not in (200, 201):
+            self.log.warning(f"got status code: {rsp.status_code} for PUT {req}")
+        self.log.info(f"PUT returning: {rsp}")
+
+        return HttpResponse(rsp)
+
+    def POST(self, req, body=None, format="json", params=None, headers=None):
+        if self._endpoint is None:
+            raise IOError("object not initialized")
+        if self._domain is None:
+            raise IOError("no domain defined")
+
+        if params is None:
+            params = {}
+        if "domain" not in params:
+            params["domain"] = self._domain
+        if "bucket" not in params and self._bucket:
+            params["bucket"] = self._bucket
+        if self._api_key:
+            params["api_key"] = self._api_key
+
+        # verify we have write intent (unless this is a dataset point selection)
+        if req.startswith("/datasets/") and req.endswith("/value"):
+            point_sel = True
+        else:
+            point_sel = False
+        if self._mode == "r" and not point_sel:
+            raise IOError("Unable perform request (No write intent on file)")
+
+        # try to do a POST to the domain
+
+        headers = self.getHeaders(headers=headers)
+
+        if isinstance(body, bytes):
+            headers["Content-Type"] = "application/octet-stream"
+            data = body
+        else:
+            # assume json
+            try:
+                data = json.dumps(body)
+            except TypeError:
+                msg = f"Unable to convert {body} to json"
+                self.log.error(msg)
+                raise IOError("JSON encoding error")
+        if format == "binary":
+            # receive data as binary
+            headers["accept"] = "application/octet-stream"
+
+        self.log.info("POST: " + req)
+
+        try:
+            s = self.session
+            ts = time.time()
+            rsp = s.post(
+                self._endpoint + req,
+                data=data,
+                headers=headers,
+                params=params,
+                verify=self.verifyCert(),
+            )
+            elapsed = time.time() - ts
+            self.log.info(f"status: POST {rsp.status_code}, elapsed: {elapsed:.4f}")
+        except ConnectionError as ce:
+            self.log.warning(f"connection error: {ce}")
+            raise IOError(str(ce))
+
+        if rsp.status_code not in (200, 201):
+            self.log.error(f"got status_code: {rsp.status_code} for DELETE: {req}")
+
+        return HttpResponse(rsp)
+
+    def DELETE(self, req, params=None, headers=None):
+        if self._endpoint is None:
+            raise IOError("object not initialized")
+
+        if req not in ("/domains", "/") and self._domain is None:
+            raise IOError("no domain defined")
+        if params is None:
+            params = {}
+        if "domain" not in params:
+            params["domain"] = self._domain
+        if "bucket" not in params and self._bucket:
+            params["bucket"] = self._bucket
+        if self._api_key:
+            params["api_key"] = self._api_key
+
+        # verify we have write intent
+        if self._mode == "r":
+            raise IOError("Unable perform request (No write intent on file)")
+
+        # try to do a DELETE of the resource
+        headers = self.getHeaders(headers=headers)
+
+        self.log.info("DEL: " + req)
+        try:
+            s = self.session
+            ts = time.time()
+            rsp = s.delete(
+                self._endpoint + req,
+                headers=headers,
+                params=params,
+                verify=self.verifyCert(),
+            )
+            self.log.info(f"status: {rsp.status_code}")
+            elapsed = time.time() - ts
+            self.log.info(f"status: DELETE {rsp.status_code}, elapsed: {elapsed:.4f}")
+        except ConnectionError as ce:
+            self.log.error(f"connection error: {ce}")
+            raise IOError("Connection Error")
+
+        if rsp.status_code == 200 and req == "/":
+            self.log.info("clearing domain_json cache")
+            self._domain_json = None
+
+        if rsp.status_code != 200:
+            self.log.warning(f"got status_code: {rsp.status_code} for DELETE {req}")
+
+        return HttpResponse(rsp)
+
+    @property
+    def session(self):
+        # create a session object to re-use http connection when possible
+        s = requests
+        retries = self._retries
+        backoff_factor = 1
+        status_forcelist = (500, 502, 503, 504)
+
+        if self._use_session:
+            if self._s is None:
+                if self._endpoint.startswith("http+unix://"):
+                    self.log.debug(f"create unixsocket session: {self._endpoint}")
+                    s = requests_unixsocket.Session()
+                else:
+                    # regular request session
+                    s = requests.Session()
+
+                retry = Retry(
+                    total=retries,
+                    read=retries,
+                    connect=retries,
+                    backoff_factor=backoff_factor,
+                    status_forcelist=status_forcelist,
+                )
+
+                s.mount(
+                    "http://",
+                    HTTPAdapter(max_retries=retry, pool_connections=16, pool_maxsize=16),
+                )
+                s.mount(
+                    "https://",
+                    HTTPAdapter(max_retries=retry, pool_connections=16, pool_maxsize=16),
+                )
+                self._s = s
+            else:
+                s = self._s
+        return s
+
+    def add_external_ref(self, fid):
+        # this is used by the group class to keep references to external links open
+        if fid.__class__.__name__ != "FileID":
+            raise TypeError("add_external_ref, expected FileID type")
+        self._external_refs.append(fid)
+
+    def close(self):
+        if self._s:
+            self._s.close()
+            self._s = None
+
+    @property
+    def domain(self):
+        return self._domain
+
+    @property
+    def username(self):
+        return self._username
+
+    @property
+    def endpoint(self):
+        return self._endpoint
+
+    @property
+    def password(self):
+        return self._password
+
+    @property
+    def mode(self):
+        return self._mode
+
+    @property
+    def domain_json(self):
+        if self._domain_json is None:
+            rsp = self.GET("/")
+            if rsp.status_code != 200:
+                raise IOError(rsp.reason)
+            # assume JSON
+            self._domain_json = rsp.json()
+        return self._domain_json
+
+    @property
+    def root_uuid(self):
+        domain_json = self.domain_json
+        if "root" not in domain_json:
+            raise IOError("Unexpected response")
+        root_uuid = domain_json["root"]
+        return root_uuid
+
+    @property
+    def compressors(self):
+        compressors = []
+        if "compressors" in self.domain_json:
+            compressors = self.domain_json["compressors"]
+        if not compressors:
+            compressors = [
+                "gzip",
+            ]
+        return compressors
+
+    @property
+    def modified(self):
+        """Last modified time of the domain as a datetime object."""
+        domain_json = self.domain_json
+        if "lastModified" not in domain_json:
+            raise IOError("Unexpected response")
+        last_modified = domain_json["lastModified"]
+        return last_modified
+
+    @property
+    def created(self):
+        """Creation time of the domain"""
+        domain_json = self.domain_json
+        if "created" not in domain_json:
+            raise IOError("Unexpected response")
+        created = domain_json["created"]
+        return created
+
+    @property
+    def owner(self):
+        """username of creator of domain"""
+        domain_json = self.domain_json
+        username = None
+        if "owner" in domain_json:
+            # currently this is only available for HSDS
+            username = domain_json["owner"]
+        return username
+
+    @property
+    def logging(self):
+        """return name of logging handler"""
+        return self.log
diff --git a/src/h5json/jsonstore/__init__.py b/src/h5json/jsonstore/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/h5json/jsonstore/h5json_reader.py b/src/h5json/jsonstore/h5json_reader.py
new file mode 100644
index 0000000..40f8e5e
--- /dev/null
+++ b/src/h5json/jsonstore/h5json_reader.py
@@ -0,0 +1,217 @@
+##############################################################################
+# Copyright by The HDF Group.                                                #
+# All rights reserved.                                                       #
+#                                                                            #
+# This file is part of H5Serv (HDF5 REST Server) Service, Libraries and      #
+# Utilities.  The full HDF5 REST Server copyright notice, including          #
+# terms governing use, modification, and redistribution, is contained in     #
+# the file COPYING, which can be found at the root of the source code        #
+# distribution tree.  If you do not have access to this file, you may        #
+# request a copy from help@hdfgroup.org.                                     #
+##############################################################################
+import json
+import logging
+
+from ..objid import getCollectionForId, getUuidFromId
+
+from ..hdf5dtype import createDataType
+from ..array_util import jsonToArray
+from .. import selections
+from ..h5reader import H5Reader
+
+
+class H5JsonReader(H5Reader):
+    """
+    This class can be used by HDF5DB to read content from an hdf5-json file
+    """
+
+    def __init__(
+        self,
+        filepath,
+        app_logger=None
+    ):
+        if app_logger:
+            self.log = app_logger
+        else:
+            self.log = logging.getLogger()
+
+        super().__init__(filepath, app_logger=app_logger)
+        self._root_id = None
+        self._h5json = None
+
+    def open(self):
+        if self._h5json:
+            return  # already read JSON file
+
+        with open(self.filepath) as f:
+            text = f.read()
+
+        # parse the json file
+        h5json = json.loads(text)
+
+        self._h5json = h5json
+
+        if "root" not in h5json:
+            raise Exception("no root key in input file")
+
+        self._root_id = "g-" + h5json["root"]
+        if self.db.root_id and self.db.root_id != self._root_id:
+            self.log.warning("h5json root id doesn't match db root id")
+            raise IOError("root id mismatch")
+
+        return self._root_id
+
+    def close(self):
+        pass
+
+    def isClosed(self):
+        return False if self._h5json else False
+
+    def get_root_id(self):
+        """ Return root id """
+        return self._root_id
+
+    def getObjectById(self, obj_id, include_attrs=True, include_links=True, include_values=False):
+        """ return object with given id """
+        collection = getCollectionForId(obj_id)
+        if collection not in self._h5json:
+            self.log.warning(f"getObjectById - collection: {collection} not found")
+            return None
+        json_objs = self._h5json[collection]
+        obj_uuid = getUuidFromId(obj_id)
+        if obj_uuid not in json_objs:
+            self.log.warning(f"getObjectById - {obj_id} not found")
+            return None
+        json_obj = json_objs[obj_uuid]
+
+        resp = {}
+        # selectively copy from the db dict
+        for k in json_obj:
+            for k in ("shape", "type", "cpl", "dcpl"):
+                if k in json_obj:
+                    resp[k] = json_obj[k]
+        if include_attrs and "attributes" in json_obj:
+            attrs = {}
+            attr_list = json_obj["attributes"]
+            for item in attr_list:
+                if "name" not in item:
+                    self.log.warning(f"expected to find name key for {obj_id} attributes")
+                    continue
+                name = item["name"]
+                attr = {}
+                if "type" not in item:
+                    raise KeyError(f"expected to find type key for attribute {name} of {obj_id}")
+                attr["type"] = item["type"]
+                if "shape" not in item:
+                    raise KeyError(f"expected to find shape key for attribute {name} of {obj_id}")
+                attr["shape"] = item["shape"]
+                if "value" in item:
+                    attr["value"] = item["value"]
+                attrs[name] = attr
+            resp["attributes"] = attrs
+
+        if include_links and "links" in json_obj:
+            links = {}
+            link_list = json_obj["links"]
+            for item in link_list:
+                if "title" not in item:
+                    self.log.warning(f"expected to find title key for {obj_id} links")
+                    continue
+                title = item["title"]
+                link = {}
+                for k in ("class", "file", "h5path"):
+                    if k in item:
+                        link[k] = item[k]
+                if "collection" in item:
+                    collection = item["collection"]
+                    if "id" not in item:
+                        self.log.warning(f"expected to find id key for {obj_id} link item")
+                        continue
+                    obj_uuid = item["id"]
+                    if collection == "groups":
+                        obj_id = "g-" + obj_uuid
+                    elif collection == "datasets":
+                        obj_id = "d-" + obj_uuid
+                    elif collection == "datatypes":
+                        obj_id = "t-" + obj_uuid
+                    else:
+                        self.log.warning(f"unexpected collection type: {collection}")
+                        continue
+                    item["id"] = obj_id
+                links[title] = item
+            resp["links"] = links
+
+        if include_values and collection == "datasets" and "value" in json_obj:
+            resp["value"] = json_obj["value"]
+
+        return resp
+
+    def getAttribute(self, obj_id, name, includeData=True):
+        """
+        Get attribute given an object id and name
+        returns: JSON object
+        """
+        self.log.debug(f"getAttribute({obj_id}), [{name}], include_data={includeData})")
+        json_obj = self.getObjectById(obj_id)
+        if json_obj is None:
+            return None
+        if "attributes" not in json_obj:
+            self.log.warning(f"obj: {obj_id} has no attributes collection")
+            return None
+        attributes = json_obj["attributes"]
+        if name not in attributes:
+            self.log.info(f"attr: [{name}] of {obj_id} not found")
+            return None
+        return attributes[name]
+
+    def getDtype(self, obj_json):
+        """ Return the dtype for the type given by obj_json """
+        if "type" not in obj_json:
+            raise KeyError("no type item found")
+        type_item = obj_json["type"]
+        if isinstance(type_item, str) and type_item.startswith("datatypes/"):
+            # this is a reference to a committed type
+            ctype_id = "t-" + getUuidFromId(type_item)
+            ctype_json = self.getObjectById(ctype_id)
+            if "type" not in ctype_json:
+                raise KeyError(f"Unexpected datatype: {ctype_json}")
+            # Use the ctype's item json
+            type_item = ctype_json["type"]
+        dtype = createDataType(type_item)
+        return dtype
+
+    def getDatasetValues(self, obj_id, sel=None, dtype=None):
+        """
+        Get values from dataset identified by obj_id.
+        If a slices list or tuple is provided, it should have the same
+        number of elements as the rank of the dataset.
+        """
+
+        self.log.debug(f"getDatasetValues({obj_id}), sel={sel}")
+        json_obj = self.getObjectById(obj_id, include_values=True)
+        if json_obj is None:
+            self.log.warning(f"no object found with id; {obj_id}")
+            return None
+
+        if "value" not in json_obj:
+            self.log.warning(f"value key not found for {obj_id}")
+            return None
+        json_value = json_obj["value"]
+        shape_json = json_obj["shape"]
+        if shape_json["class"] == "H5S_NULL":
+            self.log.warning("getDatasetValues called for null space object: {obj_id}")
+            return None
+        elif shape_json["class"] == "H5S_SCALAR":
+            dims = ()
+        else:
+            dims = shape_json["dims"]
+
+        arr = jsonToArray(dims, dtype, json_value)
+        if sel is None or sel.select_type == selections.H5S_SELECT_ALL:
+            pass  # just return the entire array
+        elif isinstance(sel, selections.SimpleSelection):
+            arr = arr[sel.slices]
+        else:
+            raise NotImplementedError("selection type not supported")
+
+        return arr
diff --git a/src/h5json/jsonstore/h5json_writer.py b/src/h5json/jsonstore/h5json_writer.py
new file mode 100644
index 0000000..92d3499
--- /dev/null
+++ b/src/h5json/jsonstore/h5json_writer.py
@@ -0,0 +1,293 @@
+##############################################################################
+# Copyright by The HDF Group.                                                #
+# All rights reserved.                                                       #
+#                                                                            #
+# This file is part of H5Serv (HDF5 REST Server) Service, Libraries and      #
+# Utilities.  The full HDF5 REST Server copyright notice, including          #
+# terms governing use, modification, and redistribution, is contained in     #
+# the file COPYING, which can be found at the root of the source code        #
+# distribution tree.  If you do not have access to this file, you may        #
+# request a copy from help@hdfgroup.org.                                     #
+##############################################################################
+
+import json
+
+from ..h5writer import H5Writer
+from ..objid import getUuidFromId, getCollectionForId, createObjId
+from ..array_util import bytesArrayToList
+from .. import selections
+
+
+class H5JsonWriter(H5Writer):
+    """
+    This abstract class defines properties and methods that the Hdf5db class uses for writing to an HDF5
+    compatible storage medium.
+    """
+
+    def __init__(
+        self,
+        filepath,
+        append=False,
+        no_data=False,
+        app_logger=None
+    ):
+        super().__init__(filepath, append=append, no_data=no_data, app_logger=app_logger)
+        if append:
+            raise ValueError("H5JsonWriter does not support append mode")
+        self.alias_db = {}
+        self.json = {}
+        self._root_id = None
+
+    def flush(self):
+        """ Write dirty items """
+        # json writer doesn't support incremental updates, so we'll wait
+        # for close to write out database
+        if not self._root_id:
+            msg = "flush called prior to open"
+            self.log.warning(msg)
+            raise IOError(msg)
+
+        self.log.info("flush")
+        return False
+
+    def open(self):
+        """ file open """
+        # no incremental updates with h5json writer, so just fetch the root_id here
+        if self.db.root_id:
+            self._root_id = self.db.root_id
+        else:
+            self._root_id = createObjId(obj_type="groups")
+        return self._root_id
+
+    def close(self):
+        """ close storage handle """
+        self.dumpFile()
+
+    def isClosed(self):
+        """ return closed status """
+        return False if self._root_id else True
+
+    def getAliasList(self, obj_id):
+        """ return list of alias """
+        if obj_id not in self.alias_db:
+            self.alias_db[obj_id] = []
+        return self.alias_db[obj_id]
+
+    def updateAliasList(self):
+        """ update the alias list for each object """
+        # clear exiting aliases
+        obj_ids = self.db.getCollection()
+        for obj_id in obj_ids:
+            self.alias_db[obj_id] = []
+
+        self._setAlias(self._root_uuid, set(), "/")
+
+    def _setAlias(self, obj_id, id_set, h5path):
+        """ add the given h5path to the object's alias list
+            If the object is a group, recurse through each hard link """
+        obj_json = self.db.getObjectById(obj_id)
+        alias_list = self.getAliasList(obj_id)
+        if h5path in alias_list:
+            return  # nothing to do
+        alias_list.append(h5path)
+        if getCollectionForId(obj_id) != "groups":
+            return  # done
+        id_set.add(obj_id)  # keep track of objects we've visited to avoid loops
+        links = obj_json["links"]
+        if h5path[-1] != '/':
+            h5path += '/'
+
+        for link_name in links:
+            link_json = links[link_name]
+            if link_json["class"] == "H5L_TYPE_HARD":
+                tgt_id = link_json["id"]
+                if tgt_id in id_set:
+                    self.log.info("_setAlias - circular loop found")
+                else:
+                    self._setAlias(tgt_id, id_set, f"{h5path}{link_name}")
+        id_set.remove(obj_id)
+
+    def dumpAttribute(self, obj_id, attr_name):
+        self.log.info(f"dumpAttribute: [{attr_name}]")
+        item = self.db.getAttribute(obj_id, attr_name)
+        response = {"name": attr_name}
+        response["type"] = item["type"]
+        response["shape"] = item["shape"]
+
+        if "value" not in item:
+            self.log.warning(f"no value key in attribute: {attr_name}")
+        else:
+            # dump values unless header -D was passed
+            response["value"] = item["value"]
+        return response
+
+    def dumpAttributes(self, obj_id):
+        attrs = self.db.getAttributes(obj_id)
+        self.log.info(f"dumpAttributes: {obj_id}")
+        items = []
+        for attr_name in attrs:
+            item = self.dumpAttribute(obj_id, attr_name)
+            items.append(item)
+
+        return items
+
+    def dumpLink(self, obj_id, name):
+        item = self.db.getLink(obj_id, name)
+        response = {"class": item["class"]}
+        if "id" in item:
+            tgt_id = item["id"]
+            response["collection"] = getCollectionForId(tgt_id)
+            response["id"] = getUuidFromId(tgt_id)
+
+        for key in item:
+            if key in ("id", "created", "modified"):
+                continue
+            response[key] = item[key]
+        response["title"] = name
+        return response
+
+    def dumpLinks(self, obj_id):
+        links = self.db.getLinks(obj_id)
+        items = []
+        for link_name in links:
+            item = self.dumpLink(obj_id, link_name)
+            items.append(item)
+        return items
+
+    def dumpGroup(self, obj_id):
+        item = self.db.getObjectById(obj_id)
+        response = {}
+
+        alias = self.getAliasList(obj_id)
+        response["alias"] = alias
+
+        if "cpl" in item:
+            item["creationProperties"] = item["cpl"]
+        attributes = self.dumpAttributes(obj_id)
+        if attributes:
+            response["attributes"] = attributes
+        links = self.dumpLinks(obj_id)
+        if links:
+            response["links"] = links
+        return response
+
+    def dumpGroups(self):
+        groups = {}
+        item = self.dumpGroup(self._root_uuid)
+        root_uuid = getUuidFromId(self._root_uuid)
+        groups[root_uuid] = item
+        obj_ids = self.db.getCollection("groups")
+        for obj_id in obj_ids:
+            if obj_id == self._root_uuid:
+                continue
+            item = self.dumpGroup(obj_id)
+            obj_uuid = getUuidFromId(obj_id)
+            groups[obj_uuid] = item
+
+        self.json["groups"] = groups
+
+    def dumpDataset(self, obj_id):
+        response = {}
+        self.log.info("dumpDataset: " + obj_id)
+        item = self.db.getObjectById(obj_id)
+        alias = self.getAliasList(obj_id)
+        response["alias"] = alias
+
+        response["type"] = item["type"]
+        shapeItem = item["shape"]
+        shape_rsp = {}
+        num_elements = 1
+        shape_rsp["class"] = shapeItem["class"]
+        if shapeItem["class"] == "H5S_NULL":
+            dims = None
+            num_elements = 0
+        elif shapeItem["class"] == "H5S_SCALAR":
+            dims = ()
+            num_elements = 1
+        else:
+            shape_rsp["dims"] = shapeItem["dims"]
+            dims = tuple(shapeItem["dims"])
+            for extent in dims:
+                num_elements *= extent
+
+        if "maxdims" in shapeItem:
+            maxdims = []
+            for dim in shapeItem["maxdims"]:
+                if dim == 0:
+                    maxdims.append("H5S_UNLIMITED")
+                else:
+                    maxdims.append(dim)
+            shape_rsp["maxdims"] = maxdims
+        response["shape"] = shape_rsp
+
+        if "cpl" in item:
+            response["creationProperties"] = item["cpl"]
+
+        attributes = self.dumpAttributes(obj_id)
+        if attributes:
+            response["attributes"] = attributes
+
+        if not self.no_data:
+            if num_elements > 0:
+                sel_all = selections.select(dims, ...)
+                arr = self.db.getDatasetValues(obj_id, sel_all)
+                response["value"] = bytesArrayToList(arr)  # dump values unless header flag was passed
+        return response
+
+    def dumpDatasets(self):
+        obj_ids = self.db.getCollection("datasets")
+        if obj_ids:
+            datasets = {}
+            for obj_id in obj_ids:
+                item = self.dumpDataset(obj_id)
+                obj_uuid = getUuidFromId(obj_id)
+                datasets[obj_uuid] = item
+
+            self.json["datasets"] = datasets
+
+    def dumpDatatype(self, obj_id):
+        response = {}
+        item = self.db.getObjectById(obj_id)
+        alias = self.getAliasList(obj_id)
+        response["alias"] = alias
+        response["type"] = item["type"]
+        if "cpl" in item:
+            response["creationProperties"] = item["cpl"]
+        attributes = self.dumpAttributes(obj_id)
+        if attributes:
+            response["attributes"] = attributes
+        return response
+
+    def dumpDatatypes(self):
+        obj_ids = self.db.getCollection("datatypes")
+        if obj_ids:
+            datatypes = {}
+            for obj_id in obj_ids:
+                item = self.dumpDatatype(obj_id)
+                obj_uuid = getUuidFromId(obj_id)
+                datatypes[obj_uuid] = item
+
+            self.json["datatypes"] = datatypes
+
+    def dumpFile(self):
+        self._root_uuid = self.db.getObjectIdByPath("/")
+
+        db_version_info = self.db.getVersionInfo()
+
+        self.json["apiVersion"] = db_version_info["hdf5-json-version"]
+        self.json["root"] = getUuidFromId(self._root_uuid)
+
+        self.updateAliasList()  # create alias_db with obj_id to alias list dict
+
+        self.dumpGroups()
+
+        self.dumpDatasets()
+
+        self.dumpDatatypes()
+        indent = 4
+        ensure_ascii = True
+        if self._filepath:
+            with open('data.json', 'w', encoding='utf-8') as f:
+                json.dump(self.json, f, ensure_ascii=ensure_ascii, indent=indent)
+        else:
+            print(json.dumps(self.json, sort_keys=True, ensure_ascii=ensure_ascii, indent=indent))
diff --git a/src/h5json/jsontoh5/jsontoh5.py b/src/h5json/jsontoh5/jsontoh5.py
index c12d037..28f5e00 100755
--- a/src/h5json/jsontoh5/jsontoh5.py
+++ b/src/h5json/jsontoh5/jsontoh5.py
@@ -9,274 +9,49 @@
 # distribution tree.  If you do not have access to this file, you may        #
 # request a copy from help@hdfgroup.org.                                     #
 ##############################################################################
-import json
-import argparse
-import h5py
+import sys
+import os.path as op
 import logging
-import logging.handlers
 
 from h5json import Hdf5db
-
-
-"""
-Writeh5 - return json representation of all objects within the given file
-    h5writer = Writeh5(db, h5json)
-        h5writer.writeFile()
-"""
-
-
-class Writeh5:
-    def __init__(self, db, json, options=None):
-        self.options = options
-        self.db = db
-        self.json = json
-        self.root_uuid = None
-
-    #
-    # Create a hard, soft, or external link
-    #
-    def createLink(self, link_obj, parent_uuid):
-        title = link_obj["title"]
-        link_class = link_obj["class"]
-        if link_class == "H5L_TYPE_HARD":
-            child_uuid = link_obj["id"]
-            self.db.linkObject(parent_uuid, child_uuid, title)
-        elif link_class == "H5L_TYPE_SOFT":
-            h5path = link_obj["h5path"]
-            self.db.createSoftLink(parent_uuid, h5path, title)
-        elif link_class == "H5L_TYPE_EXTERNAL":
-            h5path = link_obj["h5path"]
-            link_file = link_obj["file"]
-            self.db.createExternalLink(parent_uuid, link_file, h5path, title)
-        else:
-            print("Unable to create link with class:", link_class)
-
-    #
-    # Create HDF5 dataset object and write data values
-    #
-    def createDataset(self, uuid, body):
-        datatype = body["type"]
-        if isinstance(datatype, str) and datatype.startswith("datatypes/"):
-            # committed datatype, just pass in the UUID part
-            datatype = datatype[len("datatypes/") :]
-        dims = ()  # if no space in body, default to scalar
-        max_shape = None
-        creation_props = None
-        if "creationProperties" in body:
-            creation_props = body["creationProperties"]
-        if "shape" in body:
-            shape = body["shape"]
-            if shape["class"] == "H5S_SIMPLE":
-                dims = shape["dims"]
-                if isinstance(dims, int):
-                    # convert int to array
-                    dim1 = shape
-                    dims = [dim1]
-                if "maxdims" in shape:
-                    max_shape = shape["maxdims"]
-                    if isinstance(max_shape, int):
-                        # convert to array
-                        dim1 = max_shape
-                        max_shape = [dim1]
-                    # convert H5S_UNLIMITED's to None's
-                    for i in range(len(max_shape)):
-                        if max_shape[i] == "H5S_UNLIMITED":
-                            max_shape[i] = None
-            elif shape["class"] == "H5S_NULL":
-                dims = None
-
-        self.db.createDataset(
-            datatype,
-            dims,
-            max_shape=max_shape,
-            creation_props=creation_props,
-            obj_uuid=uuid,
-        )
-
-        if "value" in body:
-            data = body["value"]
-            if data:
-                data = self.db.toRef(len(dims), datatype, data)
-                self.db.setDatasetValuesByUuid(uuid, data)
-
-    def createAttribute(self, attr_json, col_name, uuid):
-        attr_name = attr_json["name"]
-        datatype = attr_json["type"]
-        if isinstance(datatype, str) and datatype.startswith("datatypes/"):
-            # committed datatype, just pass in the UUID part
-            datatype = datatype[len("datatypes/") :]
-
-        attr_value = None
-        if "value" in attr_json:
-            attr_value = attr_json["value"]
-        dims = None
-        if "shape" in attr_json:
-            shape = attr_json["shape"]
-            if shape["class"] == "H5S_SIMPLE":
-                dims = shape["dims"]
-                if isinstance(dims, int):
-                    # convert int to array
-                    dim1 = shape
-                    dims = [dim1]
-            elif shape["class"] == "H5S_SCALAR":
-                dims = ()  # empty tuple for scalar
-        self.db.createAttribute(col_name, uuid, attr_name, dims, datatype, attr_value)
-
-    #
-    # create committed datatype HDF5 object
-    #
-    def createDatatype(self, uuid, body):
-        datatype = body["type"]
-        self.db.createCommittedType(datatype, obj_uuid=uuid)
-
-    #
-    # Create HDF5 group object  (links and attributes will be added later)
-    #
-    def createGroup(self, uuid, body):
-        if uuid != self.root_uuid:
-            self.db.createGroup(obj_uuid=uuid)
-
-    #
-    # Create all the HDF5 objects defined in the JSON file
-    #
-    def createObjects(self):
-        # create datatypes
-        if "datatypes" in self.json:
-            datatypes = self.json["datatypes"]
-            for uuid in datatypes:
-                json_obj = datatypes[uuid]
-                self.createDatatype(uuid, json_obj)
-        # create groups
-        if "groups" in self.json:
-            groups = self.json["groups"]
-            for uuid in groups:
-                json_obj = groups[uuid]
-                self.createGroup(uuid, json_obj)
-        # create datasets
-        if "datasets" in self.json:
-            datasets = self.json["datasets"]
-            for uuid in datasets:
-                json_obj = datasets[uuid]
-                self.createDataset(uuid, json_obj)
-
-    #
-    # Create all the attributes for HDF5 objects defined in the JSON file
-    # Note: this needs to be done after createObjects since an attribute
-    # may use a committed datatype
-    #
-    def createAttributes(self):
-        dimension_list_attrs = []  # track dimension list attributes
-        # create datatype attributes
-        if "datatypes" in self.json:
-            datatypes = self.json["datatypes"]
-            for uuid in datatypes:
-                body = datatypes[uuid]
-                if "attributes" in body:
-                    attributes = body["attributes"]
-                    for attribute in attributes:
-                        self.createAttribute(attribute, "datatypes", uuid)
-        # create group attributes
-        if "groups" in self.json:
-            groups = self.json["groups"]
-            for uuid in groups:
-                body = groups[uuid]
-                if "attributes" in body:
-                    attributes = body["attributes"]
-                    for attribute in attributes:
-                        self.createAttribute(attribute, "groups", uuid)
-        # create datasets
-        if "datasets" in self.json:
-            datasets = self.json["datasets"]
-            for uuid in datasets:
-                body = datasets[uuid]
-                if "attributes" in body:
-                    attributes = body["attributes"]
-                    for attribute in attributes:
-                        if attribute["name"] == "DIMENSION_LIST":
-                            # defer dimension list creation until after we've created all other
-                            # attributes (otherwsie attach_scale may fail)
-                            dimension_list_attrs.append(
-                                {"attribute": attribute, "uuid": uuid}
-                            )
-                        else:
-                            self.createAttribute(attribute, "datasets", uuid)
-
-        # finally, do dimension_list attributes
-        for item in dimension_list_attrs:
-            attribute = item["attribute"]
-            uuid = item["uuid"]
-            self.createAttribute(attribute, "datasets", uuid)
-
-    #
-    # Link all the objects
-    # Note: this will "de-anonymous-ize" objects defined in the HDF5 file
-    #   Any non-linked objects will be deleted when the __db__ group is deleted
-    #
-    def createLinks(self):
-        if "groups" in self.json:
-            groups = self.json["groups"]
-            for uuid in groups:
-                json_obj = groups[uuid]
-                if "links" in json_obj:
-                    links = json_obj["links"]
-                    for link in links:
-                        self.createLink(link, uuid)
-
-    def writeFile(self):
-
-        self.root_uuid = self.json["root"]
-
-        self.createObjects()  # create datasets, groups, committed datatypes
-        self.createAttributes()  # create attributes for objects
-        self.createLinks()  # link it all together
+from h5json.h5pystore.h5py_writer import H5pyWriter
+from h5json.jsonstore.h5json_reader import H5JsonReader
 
 
 def main():
-    parser = argparse.ArgumentParser(usage="%(prog)s [-h] <json_file> <h5_file>")
-    parser.add_argument(
-        "in_filename", nargs="+", help="JSon file to be converted to h5"
-    )
-    parser.add_argument("out_filename", nargs="+", help="name of HDF5 output file")
-    args = parser.parse_args()
+    if len(sys.argv) < 3 or sys.argv[1] in ("-h", "--help"):
+        print(f"usage: {sys.argv[0]} [-h] [--nodata] <json_file> <h5_file>")
+        sys.exit(0)
+
+    no_data = False
+    json_filename = None
+    hdf5_filename = None
+    for i in range(1, len(sys.argv)):
+        if sys.argv[i] == "--nodata":
+            no_data = True
+        elif not json_filename:
+            json_filename = sys.argv[i]
+        else:
+            hdf5_filename = sys.argv[i]
 
     # create logger
-    log = logging.getLogger("h5serv")
-    # log.setLevel(logging.WARN)
-    log.setLevel(logging.INFO)
-    # add log handler
-    handler = logging.FileHandler("./jsontoh5.log")
-
-    # add handler to logger
-    log.addHandler(handler)
-
-    text = open(args.in_filename[0]).read()
-
-    # parse the json file
-    h5json = json.loads(text)
-
-    if "root" not in h5json:
-        raise Exception("no root key in input file")
-    root_uuid = h5json["root"]
-
-    filename = args.out_filename[0]
-
-    # create the file, will raise IOError if there's a problem
-    Hdf5db.createHDF5File(filename)
-
-    with Hdf5db(
-        filename, root_uuid=root_uuid, update_timestamps=False, app_logger=log
-    ) as db:
-        h5writer = Writeh5(db, h5json)
-        h5writer.writeFile()
-
-    # open with h5py and remove the _db_ group
-    # Note: this will delete any anonymous (un-linked) objects
-    f = h5py.File(filename, "a")
-    if "__db__" in f:
-        del f["__db__"]
-    f.close()
-
-    print("done!")
+    logfname = "jsontoh5.log"
+    loglevel = logging.DEBUG
+    logging.basicConfig(filename=logfname, format='%(levelname)s %(asctime)s %(message)s', level=loglevel)
+    log = logging.getLogger()
+
+    # check that the input file exists
+    if not op.isfile(json_filename):
+        sys.exit(f"Cannot find file: {json_filename}")
+
+    log.info(f"jsontoh5 {json_filename} to {hdf5_filename}")
+
+    db = Hdf5db(app_logger=log)
+    db.reader = H5JsonReader(json_filename, app_logger=log)
+    db.writer = H5pyWriter(hdf5_filename, no_data=no_data, app_logger=log)
+    db.open()  # read json data
+    # close should create everything the json reader read to the output file
+    db.close()
 
 
 if __name__ == "__main__":
diff --git a/src/h5json/objid.py b/src/h5json/objid.py
new file mode 100644
index 0000000..57b5316
--- /dev/null
+++ b/src/h5json/objid.py
@@ -0,0 +1,508 @@
+##############################################################################
+# Copyright by The HDF Group.                                                #
+# All rights reserved.                                                       #
+#                                                                            #
+# This file is part of HDF (HDF5 REST Server) Service, Libraries and      #
+# Utilities.  The full HDF5 REST Server copyright notice, including          #
+# terms governing use, modification, and redistribution, is contained in     #
+# the file COPYING, which can be found at the root of the source code        #
+# distribution tree.  If you do not have access to this file, you may        #
+# request a copy from help@hdfgroup.org.                                     #
+##############################################################################
+#
+# objID:
+# id (uuid) related functions
+#
+
+
+import hashlib
+import uuid
+
+S3_URI = "s3://"
+FILE_URI = "file://"
+AZURE_URI = "blob.core.windows.net/"  # preceded with "https://"
+UUID_LEN = 36  # length for uuid strings
+
+
+def _getStorageProtocol(uri):
+    """ returns 's3://', 'file://', or 'https://...net/' prefix if present.
+    If the prefix is in the form: https://myaccount.blob.core.windows.net/mycontainer
+    (references Azure blob storage), return: https://myaccount.blob.core.windows.net/
+    otherwise None """
+
+    if not uri:
+        protocol = None
+    elif uri.startswith(S3_URI):
+        protocol = S3_URI
+    elif uri.startswith(FILE_URI):
+        protocol = FILE_URI
+    elif uri.startswith("https://") and uri.find(AZURE_URI) > 0:
+        n = uri.find(AZURE_URI) + len(AZURE_URI)
+        protocol = uri[:n]
+    elif uri.find("://") >= 0:
+        raise ValueError(f"storage uri: {uri} not supported")
+    else:
+        protocol = None
+    return protocol
+
+
+def _getBaseName(uri):
+    """ Return the part of the URI after the storage protocol (if any) """
+
+    protocol = _getStorageProtocol(uri)
+    if not protocol:
+        return uri
+    else:
+        return uri[len(protocol):]
+
+
+def _getPrefixForCollection(collection):
+    """ Return prefix character for given collection type """
+    collection = collection.lower()
+
+    if collection in ("group", "groups"):
+        return 'g'
+    elif collection in ("dataset", "datasets"):
+        return 'd'
+    elif collection in ("datatype", "datatypes"):
+        return 't'
+    elif collection in ("chunk", "chunks"):
+        return 'c'
+    else:
+        raise ValueError(f"unexpected collection type: {collection}")
+
+
+def getIdHash(id):
+    """Return md5 prefix based on id value"""
+    m = hashlib.new("md5")
+    m.update(id.encode("utf8"))
+    hexdigest = m.hexdigest()
+    return hexdigest[:5]
+
+
+def isSchema2Id(id):
+    """return true if this is a v2 id"""
+    # v1 ids are in the standard UUID format: 8-4-4-4-12
+    # v2 ids are in the non-standard: 8-8-4-6-6
+    if not isValidUuid(id):
+        return False
+    parts = id.split("-")
+    if len(parts) != 6:
+        raise ValueError(f"Unexpected id formation for uuid: {id}")
+    if len(parts[2]) == 8:
+        return True
+    else:
+        return False
+
+
+def getIdHexChars(id):
+    """get the hex chars of the given id"""
+    if id[0] == "c":
+        # don't include chunk index
+        index = id.index("_")
+        parts = id[0:index].split("-")
+    else:
+        parts = id.split("-")
+    if len(parts) != 6:
+        raise ValueError(f"Unexpected id format for uuid: {id}")
+    return "".join(parts[1:])
+
+
+def hexRot(ch):
+    """rotate hex character by 8"""
+    return format((int(ch, base=16) + 8) % 16, "x")
+
+
+def getCollectionForId(obj_id):
+    """return groups/datasets/datatypes based on id"""
+    if not isinstance(obj_id, str):
+        raise ValueError("invalid object id")
+
+    collection = None
+    if obj_id.startswith("g-") or obj_id.startswith("groups/"):
+        collection = "groups"
+    elif obj_id.startswith("d-") or obj_id.startswith("datasets/"):
+        collection = "datasets"
+    elif obj_id.startswith("t-") or obj_id.startswith("datatypes"):
+        collection = "datatypes"
+    else:
+        raise ValueError(f"{obj_id} not a collection id")
+    return collection
+
+
+def isRootObjId(id):
+    """returns true if this is a root id (only for v2 schema)"""
+    if not isSchema2Id(id):
+        raise ValueError("isRootObjId can only be used with v2 ids")
+    validateUuid(id)  # will throw ValueError exception if not a objid
+    try:
+        if getCollectionForId(id) != "groups":
+            return False  # not a group
+    except ValueError:
+        return False
+    token = getIdHexChars(id)
+    # root ids will have last 16 chars rotated version of the first 16
+    is_root = True
+    for i in range(16):
+        if token[i] != hexRot(token[i + 16]):
+            is_root = False
+            break
+    return is_root
+
+
+def getRootObjId(id):
+    """returns root id for this objid if this is a root id
+    (only for v2 schema)
+    """
+    if isRootObjId(id):
+        return id  # this is the root id
+    token = list(getIdHexChars(id))
+    # root ids will have last 16 chars rotated version of the first 16
+    for i in range(16):
+        token[i + 16] = hexRot(token[i])
+    token = "".join(token)
+    root_id = "g-" + token[0:8] + "-" + token[8:16] + "-" + token[16:20]
+    root_id += "-" + token[20:26] + "-" + token[26:32]
+
+    return root_id
+
+
+def createObjId(obj_type=None, root_id=None):
+    """ create a new objid
+
+        if obj_type is None, return just a bare uuid.
+        Otherwise a hsds v2 schema obj_id will be created.
+        In this case obj_type should be one of "groups",
+        "datasets", "datatypes", "chunks".  If rootid is
+        None, a root group obj_id will be created.  Otherwise the
+        obj_id will be a an id that has root_id as it's root.  """
+
+    prefix = None
+    if obj_type is None:
+        # just return a regular uuid
+        objid = str(uuid.uuid4())
+    else:
+
+        prefix = _getPrefixForCollection(obj_type)
+        # schema v2
+        salt = uuid.uuid4().hex
+        # take a hash to randomize the uuid
+        token = list(hashlib.sha256(salt.encode()).hexdigest())
+
+        if root_id:
+            # replace first 16 chars of token with first 16 chars of root id
+            root_hex = getIdHexChars(root_id)
+            token[0:16] = root_hex[0:16]
+        else:
+            if obj_type != "groups":
+                raise ValueError("expected 'groups' obj_type for root group id")
+            # use only 16 chars, but make it look a 32 char id
+            for i in range(16):
+                token[16 + i] = hexRot(token[i])
+        # format as a string
+        token = "".join(token)
+        objid = prefix + "-" + token[0:8] + "-" + token[8:16] + "-"
+        objid += token[16:20] + "-" + token[20:26] + "-" + token[26:32]
+
+    return objid
+
+
+def getS3Key(id):
+    """Return s3 key for given id.
+
+    For schema v1:
+        A md5 prefix is added to the front of the returned key to better
+        distribute S3 objects.
+    For schema v2:
+        The id is converted to the pattern: "db/{rootid[0:16]}" for rootids and
+        "db/id[0:16]/{prefix}/id[16-32]" for other ids
+        Chunk ids have the chunk index added after the slash:
+        "db/id[0:16]/d/id[16:32]/x_y_z
+
+    For domain id's:
+        Return a key with the .domain suffix and no preceding slash.
+        For non-default buckets, use the format: <bucket_name>/s3_key
+        If the id has a storage specifier ("s3://", "file://", etc.)
+        include that along with the bucket name. e.g.: "s3://mybucket/a_folder/a_file.h5"
+    """
+
+    base_id = _getBaseName(id)  # strip any s3://, etc.
+    if base_id.find("/") > 0:
+        # a domain id
+        domain_suffix = ".domain.json"
+        index = base_id.find("/") + 1
+        key = base_id[index:]
+        if not key.endswith(domain_suffix):
+            if key[-1] != "/":
+                key += "/"
+            key += domain_suffix
+    else:
+        if isSchema2Id(id):
+            # schema v2 id
+            hexid = getIdHexChars(id)
+            prefix = id[0]  # one of g, d, t, c
+            if prefix not in ("g", "d", "t", "c"):
+                raise ValueError(f"Unexpected id: {id}")
+
+            if isRootObjId(id):
+                key = f"db/{hexid[0:8]}-{hexid[8:16]}"
+            else:
+                partition = ""
+                if prefix == "c":
+                    # use 'g' so that chunks will show up under their dataset
+                    s3col = "d"
+                    n = id.find("-")
+                    if n > 1:
+                        # extract the partition index if present
+                        partition = "p" + id[1:n]
+                else:
+                    s3col = prefix
+                key = f"db/{hexid[0:8]}-{hexid[8:16]}/{s3col}/{hexid[16:20]}"
+                key += f"-{hexid[20:26]}-{hexid[26:32]}"
+            if prefix == "c":
+                if partition:
+                    key += "/"
+                    key += partition
+                # add the chunk coordinate
+                index = id.index("_")  # will raise ValueError if not found
+                n = index + 1
+                coord = id[n:]
+                key += "/"
+                key += coord
+            elif prefix == "g":
+                # add key suffix for group
+                key += "/.group.json"
+            elif prefix == "d":
+                # add key suffix for dataset
+                key += "/.dataset.json"
+            else:
+                # add key suffix for datatype
+                key += "/.datatype.json"
+        else:
+            # v1 id
+            # schema v1 id
+            idhash = getIdHash(id)
+            key = f"{idhash}-{id}"
+
+    return key
+
+
+def getObjId(s3key):
+    """Return object id given valid s3key"""
+    if all(
+        (
+            len(s3key) >= 44 and s3key[0:5].isalnum(),
+            len(s3key) >= 44 and s3key[5] == "-",
+            len(s3key) >= 44 and s3key[6] in ("g", "d", "c", "t"),
+        )
+    ):
+        # v1 obj keys
+        objid = s3key[6:]
+    elif s3key.endswith("/.domain.json"):
+        objid = "/" + s3key[: -(len("/.domain.json"))]
+    elif s3key.startswith("db/"):
+        # schema v2 object key
+        parts = s3key.split("/")
+        chunk_coord = ""  # used only for chunk ids
+        partition = ""  # likewise
+        token = []
+        for ch in parts[1]:
+            if ch != "-":
+                token.append(ch)
+
+        if len(parts) == 3:
+            # root id
+            # last part should be ".group.json"
+            if parts[2] != ".group.json":
+                raise ValueError(f"unexpected S3Key: {s3key}")
+            # add 16 more chars using rotated version of first 16
+            for i in range(16):
+                token.append(hexRot(token[i]))
+            prefix = "g"
+        elif len(parts) == 5:
+            # group, dataset, or datatype or chunk
+            for ch in parts[3]:
+                if ch != "-":
+                    token.append(ch)
+
+            if parts[2] == "g" and parts[4] == ".group.json":
+                prefix = "g"  # group json
+            elif parts[2] == "t" and parts[4] == ".datatype.json":
+                prefix = "t"  # datatype json
+            elif parts[2] == "d":
+                if parts[4] == ".dataset.json":
+                    prefix = "d"  # dataset json
+                else:
+                    # chunk object
+                    prefix = "c"
+                    chunk_coord = "_" + parts[4]
+            else:
+                raise ValueError(f"unexpected S3Key: {s3key}")
+        elif len(parts) == 6:
+            # chunk key with partitioning
+            for ch in parts[3]:
+                if ch != "-":
+                    token.append(ch)
+            if parts[2][0] != "d":
+                raise ValueError(f"unexpected S3Key: {s3key}")
+            prefix = "c"
+            partition = parts[4]
+            if partition[0] != "p":
+                raise ValueError(f"unexpected S3Key: {s3key}")
+            partition = partition[1:]  # strip off the p
+            chunk_coord = "_" + parts[5]
+        else:
+            raise ValueError(f"unexpected S3Key: {s3key}")
+
+        token = "".join(token)
+        objid = prefix + partition + "-" + token[0:8] + "-" + token[8:16]
+        objid += "-" + token[16:20] + "-" + token[20:26] + "-"
+        objid += token[26:32] + chunk_coord
+    else:
+        msg = f"unexpected S3Key: {s3key}"
+        raise ValueError(msg)
+    return objid
+
+
+def isS3ObjKey(s3key):
+    """ return True if this is a storage key """
+    valid = False
+    try:
+        objid = getObjId(s3key)
+        if objid:
+            valid = True
+    except KeyError:
+        pass  # ignore
+    except ValueError:
+        pass  # ignore
+    return valid
+
+
+def validateUuid(id, obj_class=None):
+    """ verify the UUID is well-formed
+        schema can be:
+           None: expecting ordinary UUID
+           "v1": expecting HSDS v1 format
+           "v2": expecting HSDS v2 format
+        if set obj_class can be one of "groups", "datasets", "datatypes"
+    """
+    if not isinstance(id, str):
+        raise ValueError("Expected string type")
+    if len(id) < UUID_LEN:
+        raise ValueError("id is too short to be an object identifier")
+    if len(id) == UUID_LEN:
+        if obj_class:
+            # expected a prefix
+            raise ValueError(f"obj_id: {id} not valid for collection: {obj_class}")
+    else:
+        # does this have a v1 schema hash tag?
+        # e.g.: "a49be-g-314d61b8-9954-11e6-a733-3c15c2da029e",
+        if id[:5].isalnum() and id[5] == '-':
+            id = id[6:]  # trim off the hash tag
+
+        # for id's like "datasets/abced...", trim the collection name and add collection
+        # prefix to the id if not already present
+        if id.find('/') > 0:
+            parts = id.split('/')
+            if len(parts) > 2:
+                raise ValueError(f"obj_id: {id} not valid (too many slash chars)")
+            collection = parts[0]
+            if getCollectionForId(id) != collection:
+                raise ValueError(f"obj_id: {id} invalid collection")
+            id = parts[1]
+            if len(id) == UUID_LEN:
+                # prefix with the one char collection code
+                id = _getPrefixForCollection(collection) + '-' + id
+
+        # validate prefix
+        if id[0] not in ("g", "d", "t", "c"):
+            raise ValueError("Unexpected prefix")
+        if id[0] != "c" and id[1] != "-":
+            # chunk ids may have a partition index following the c
+            raise ValueError("Unexpected prefix")
+        if obj_class is not None:
+            obj_class = obj_class.lower()
+            if id[0] != _getPrefixForCollection(obj_class):
+                raise ValueError(f"unexpected object id {id} for collection: {obj_class}")
+        if id[0] == "c":
+            # trim the type char and any partition id
+            n = id.find("-")
+            if n == -1:
+                raise ValueError("Invalid chunk id")
+
+            # trim the chunk index for chunk ids
+            m = id.find("_")
+            if m == -1:
+                raise ValueError("Invalid chunk id")
+            n += 1
+            id = "c-" + id[n:m]
+        id = id[2:]
+    if len(id) != UUID_LEN:
+        # id should be 36 now
+        raise ValueError("Unexpected id length")
+
+    for ch in id:
+        if ch.isalnum():
+            continue
+        if ch == "-":
+            continue
+        raise ValueError(f"Unexpected character in uuid: {ch}")
+
+
+def isValidUuid(id, obj_class=None):
+    try:
+        validateUuid(id, obj_class)
+        return True
+    except ValueError:
+        return False
+
+
+def isValidChunkId(id):
+    if not isValidUuid(id):
+        return False
+    if id[0] != "c":
+        return False
+    return True
+
+
+def getClassForObjId(id):
+    """return domains/chunks/groups/datasets/datatypes based on id"""
+    if not isinstance(id, str):
+        raise ValueError("Expected string type")
+    if len(id) == 0:
+        raise ValueError("Empty string")
+    if id[0] == "/":
+        return "domains"
+    if isValidChunkId(id):
+        return "chunks"
+    else:
+        return getCollectionForId(id)
+
+
+def isObjId(id):
+    """return true if uuid or domain"""
+    if not isinstance(id, str) or len(id) == 0:
+        return False
+    if id.find("/") > 0:
+        # domain id is any string in the form <bucket_name>/<domain_path>
+        return True
+    return isValidUuid(id)
+
+
+def getUuidFromId(id):
+    """strip off the type prefix ('g-' or 'd-', or 't-')
+    and return the uuid part """
+    if id.find('/') > 0:
+        # remove a collection name prefix if present
+        parts = id.split('/')
+        if len(parts) > 2:
+            raise ValueError(f"Unexpected obj_id: {id}")
+        id = parts[1]
+    if len(id) == UUID_LEN:
+        # just a uuid
+        return id
+    elif len(id) == UUID_LEN + 2:
+        # 'g-', 'd-', or 't-' prefix
+        return id[2:]
+    else:
+        raise ValueError(f"Unexpected obj_id: {id}")
diff --git a/src/h5json/openid.py b/src/h5json/openid.py
new file mode 100644
index 0000000..af38d94
--- /dev/null
+++ b/src/h5json/openid.py
@@ -0,0 +1,437 @@
+import os
+import sys
+import json
+import requests
+import time
+from abc import ABC, abstractmethod
+from datetime import datetime
+
+from . import config as hsconfig
+
+
+def eprint(*args, **kwargs):
+    print(*args, file=sys.stderr, **kwargs)
+
+
+# Azure
+try:
+    import adal
+except ModuleNotFoundError:
+    pass  # change this to the eprint below to see the import error
+    # eprint()"Unable to import azure auth packages")
+
+# Google
+try:
+    from google_auth_oauthlib.flow import InstalledAppFlow as GoogleInstalledAppFlow
+    from google.auth.transport.requests import Request as GoogleRequest
+    from google.oauth2.credentials import Credentials as GoogleCredentials
+    from google.oauth2 import id_token as GoogleIDToken
+except ModuleNotFoundError:
+    pass  # change this to the eprint below to see the import error
+    # eprint("Unable to import google auth packages")
+
+
+class OpenIDHandler(ABC):
+
+    def __init__(self, endpoint, use_token_cache=True, username=None, password=None):
+        """Initialize the token."""
+
+        # Location of the token cache.
+        self._token_cache_file = os.path.expanduser('~/.hstokencfg')
+        self._endpoint = endpoint
+        self._username = username
+        self._password = password
+
+        # The _token attribute should be a dict with at least the following keys:
+        #
+        # accessToken - The OpenID token to send.
+        # refreshToken - The refresh token (optional).
+        # expiresOn - The unix timestamp when the token expires (optional).
+
+        if not use_token_cache or not os.path.isfile(self._token_cache_file):
+            self._token = None
+        else:
+            if username:
+                file_key = username + '@' + endpoint
+            else:
+                file_key = endpoint
+            with open(self._token_cache_file, 'r') as token_file:
+                self._token = json.load(token_file).get(file_key, None)
+
+    @abstractmethod
+    def acquire(self):
+        """Acquire a new token from the provider."""
+        pass
+
+    @abstractmethod
+    def refresh(self):
+        """Refresh an existing token with the provider."""
+        pass
+
+    @property
+    def username(self):
+        """ Return username if known """
+        return self._username
+
+    @property
+    def expired(self):
+        """Return if the token is expired."""
+        t = self._token
+        # add some buffer to account for clock skew
+        return t is not None and 'expiresOn' in t and time.time() + 10.0 >= t['expiresOn']
+
+    @property
+    def token(self):
+        """Return the token if valid, otherwise get a new one."""
+
+        if self.expired:
+            self.refresh()
+            if self._token:
+                self.write_token_cache()
+
+        if self._token is None:
+            self.acquire()
+            self.write_token_cache()
+
+        return self._token['accessToken']
+
+    def write_token_cache(self):
+        """Write the token to a file cache."""
+
+        cache_exists = os.path.isfile(self._token_cache_file)
+
+        if self._username:
+            file_key = self._username + '@' + self._endpoint
+        else:
+            file_key = self._endpoint
+
+        # Create a new cache file.
+        if not cache_exists and self._token is not None:
+            with open(self._token_cache_file, 'w') as token_file:
+                json.dump({file_key: self._token}, token_file)
+
+        # Update an exisiting cache file.
+        elif cache_exists:
+            with open(self._token_cache_file, 'r+') as token_file:
+                cache = json.loads(token_file.read())
+
+                # Store valid tokens.
+                if self._token is not None:
+                    cache[file_key] = self._token
+
+                # Delete invalid tokens.
+                elif file_key in cache:
+                    del cache[file_key]
+
+                token_file.seek(0)
+                token_file.truncate(0)
+                json.dump(cache, token_file)
+
+
+class AzureOpenID(OpenIDHandler):
+
+    AUTHORITY_URI = 'https://login.microsoftonline.com'  # login endpoint for AD auth
+
+    def __init__(self, endpoint, config=None):
+        """Store configuration."""
+
+        # Configuration manager
+        hs_config = hsconfig.get_config()
+
+        # Config is a dictionary.
+        if isinstance(config, dict):
+            self.config = config
+
+        # Maybe client_secrets are in environment variables?
+        else:
+
+            self.config = {
+                'AD_APP_ID': hs_config.get("hs_ad_app_id", None),
+                'AD_TENANT_ID': hs_config.get("hs_ad_tenant_id", None),
+                'AD_RESOURCE_ID': hs_config.get("hs_ad_resource_id", None),
+                'AD_CLIENT_SECRET': hs_config.get("hs_ad_client_secret", None)
+            }
+
+        if 'AD_CLIENT_SECRET' in self.config and self.config['AD_CLIENT_SECRET']:
+            use_token_cache = False
+        else:
+            use_token_cache = True
+
+        super().__init__(endpoint, use_token_cache=use_token_cache)
+
+    def write_token_cache(self):
+        if 'AD_CLIENT_SECRET' in self.config and self.config['AD_CLIENT_SECRET']:
+            pass  # don't use token cache for unattended authentication
+        else:
+            super().write_token_cache()
+
+    def acquire(self):
+        """Acquire a new Azure token."""
+
+        if "adal" not in sys.modules:
+            msg = "adal module not found, run: pip install -e . '.[azure]'"
+            raise ModuleNotFoundError(msg)
+
+        app_id = self.config["AD_APP_ID"]
+        resource_id = self.config["AD_RESOURCE_ID"]
+        tenant_id = self.config["AD_TENANT_ID"]
+        client_secret = self.config.get("AD_CLIENT_SECRET", None)
+        authority_uri = self.AUTHORITY_URI + '/' + tenant_id
+
+        # Try to get a token using different oauth flows.
+        context = adal.AuthenticationContext(authority_uri, enable_pii=True, api_version=None)
+
+        try:
+            if client_secret is not None:
+                code = context.acquire_token_with_client_credentials(resource_id, app_id, client_secret)
+            else:
+                code = context.acquire_user_code(resource_id, app_id)
+
+        except Exception as e:
+            eprint(f"unable to process AD token: {e}")
+            self._token = None
+            self.write_token_cache()
+            raise
+
+        if "message" in code:
+            eprint(code["message"])
+            mgmt_token = context.acquire_token_with_device_code(resource_id, code, app_id)
+
+        elif "accessToken" in code:
+            mgmt_token = code
+
+        else:
+            eprint("Could not authenticate with AD")
+
+        # Only store some fields.
+        self._token = {
+            'accessToken': mgmt_token['accessToken'],
+            'refreshToken': mgmt_token.get('refreshToken', None),
+            'tenantId': mgmt_token.get('tenantId', tenant_id),
+            'clientId': mgmt_token.get('_clientId', app_id),
+            'resource': mgmt_token.get('resource', resource_id)
+        }
+
+        # Parse time to timestamp.
+        if 'expiresOn' in mgmt_token:
+            expire_dt = datetime.strptime(mgmt_token['expiresOn'], '%Y-%m-%d %H:%M:%S.%f')
+            self._token['expiresOn'] = expire_dt.timestamp()
+
+    def refresh(self):
+        """Try to renew an Azure token."""
+
+        try:
+
+            # This will work for device code flow, but not with client
+            # credentials. If we have the secret, we can just request a new
+            # token anyways.
+
+            authority_uri = self.AUTHORITY_URI + '/' + self._token['tenantId']
+            context = adal.AuthenticationContext(authority_uri, api_version=None)
+            mgmt_token = context.acquire_token_with_refresh_token(self._token['refreshToken'],
+                                                                  self._token['clientId'],
+                                                                  self._token['resource'],
+                                                                  None)
+
+            # New token does not have all the metadata.
+            self._token['accessToken'] = mgmt_token['accessToken']
+            self._token['refreshToken'] = mgmt_token['refreshToken']
+
+            # Parse time to timestamp.
+            if 'expiresOn' in mgmt_token:
+                expire_dt = datetime.strptime(mgmt_token['expiresOn'], '%Y-%m-%d %H:%M:%S.%f')
+                self._token['expiresOn'] = expire_dt.timestamp()
+
+        except Exception:
+            self._token = None
+
+
+class GoogleOpenID(OpenIDHandler):
+
+    def __init__(self, endpoint, config=None, scopes=None):
+        """Store configuration."""
+
+        if "google.oauth2" not in sys.modules:
+            msg = "google.oauth2 module not found, run: pip install -e . '.[google]'"
+            raise ModuleNotFoundError(msg)
+
+        # Configuration manager
+        hs_config = hsconfig.get_config()
+
+        if scopes is None:
+            scopes = hs_config.get('hs_google_scopes', 'openid').split()
+        self.scopes = scopes
+
+        # Config is a client_secrets dictionary.
+        if isinstance(config, dict):
+            self.config = config
+
+        # Config points to a client_secrets.json file.
+        elif isinstance(config, str) and os.path.isfile(config):
+            with open(config, 'r') as f:
+                self.config = json.loads(f.read())
+
+        # Maybe client_secrets are in environment variables?
+        else:
+            self.config = {
+                'installed': {
+                    'project_id': hs_config.get('hs_google_project_id', None),
+                    'client_id': hs_config.get('hs_google_client_id', None),
+                    'client_secret': hs_config.get('hs_google_client_secret', None),
+                    'auth_uri': 'https://accounts.google.com/o/oauth2/auth',
+                    'token_uri': 'https://oauth2.googleapis.com/token',
+                    'auth_provider_x509_cert_url': 'https://www.googleapis.com/oauth2/v1/certs',
+                    'redirect_uris': ['urn:ietf:wg:oauth:2.0:oob', 'http://localhost']
+                }
+            }
+
+        super().__init__(endpoint)
+
+    def _parse(self, creds):
+        """Parse credentials."""
+
+        # NOTE: In Google OpenID, if a client is set up for InstalledAppFlow
+        # then the client_secret is not actually treated as a secret. Acquire
+        # will ALWAYS prompt for user input before granting a token.
+
+        token = {
+            'accessToken': creds.id_token,
+            'refreshToken': creds.refresh_token,
+            'tokenUri': creds.token_uri,
+            'clientId': creds.client_id,
+            'clientSecret': creds.client_secret,
+            'scopes': creds.scopes
+        }
+
+        # The expiry field that is in creds is for the OAuth token, not the
+        # OpenID token. We need to validate the OpenID tokenn to get the exp.
+        idinfo = GoogleIDToken.verify_oauth2_token(creds.id_token, GoogleRequest())
+        if 'exp' in idinfo:
+            token['expiresOn'] = idinfo['exp']
+
+        return token
+
+    def acquire(self):
+        """Acquire a new Google token."""
+
+        flow = GoogleInstalledAppFlow.from_client_config(self.config,
+                                                         scopes=self.scopes)
+        creds = flow.run_console()
+        self._token = self._parse(creds)
+
+    def refresh(self):
+        """Try to renew a token."""
+
+        try:
+
+            token = self._token
+            creds = GoogleCredentials(token=None,
+                                      refresh_token=token['refreshToken'],
+                                      scopes=token['scopes'],
+                                      token_uri=token['tokenUri'],
+                                      client_id=token['clientId'],
+                                      client_secret=token['clientSecret'])
+
+            creds.refresh(GoogleRequest())
+            self._token = self._parse(creds)
+
+        except Exception:
+            self._token = None
+
+
+class KeycloakOpenID(OpenIDHandler):
+
+    def __init__(self, endpoint, config=None, scopes=None, username=None, password=None):
+        """Store configuration."""
+
+        # Configuration manager
+        hs_config = hsconfig.get_config()
+
+        if scopes is None:
+            scopes = hs_config.get('hs_keycloak_scopes', 'openid').split()
+        self.scopes = scopes
+
+        # Config is a client_secrets dictionary.
+        if isinstance(config, dict):
+            self.config = config
+
+        # Config points to a client_secrets.json file.
+        elif isinstance(config, str) and os.path.isfile(config):
+            with open(config, 'r') as f:
+                self.config = json.loads(f.read())
+
+        # Maybe configs are in environment variables?
+        else:
+            self.config = {
+                'keycloak_client_id': hs_config.get('hs_keycloak_client_id', None),
+                'keycloak_client_secret': hs_config.get('hs_keycloak_client_secret', None),
+                'keycloak_realm': hs_config.get('hs_keycloak_realm', None),
+                'keycloak_uri': hs_config.get('hs_keycloak_uri', None)
+            }
+
+        super().__init__(endpoint, username=username, password=password)
+
+    def _getKeycloakUrl(self):
+        if not self.config['keycloak_uri']:
+            raise KeyError("keycloak_uri not set")
+        if not self.config['keycloak_realm']:
+            raise KeyError("Keycloak realm not set")
+        if not self.config['keycloak_client_id']:
+            raise KeyError("keycloak client_id not set")
+
+        url = self.config['keycloak_uri']
+        url += "/realms/"
+        url += self.config['keycloak_realm']
+        url += "/protocol/openid-connect/token"
+
+        return url
+
+    def _parse(self, creds):
+        """Parse credentials."""
+
+        # validate json returned by keycloak
+        if "token_type" not in creds:
+            raise IOError("Unexpected Keycloak JWT, no token_type")
+        if creds["token_type"].lower() != "bearer":
+            raise IOError("Unexpected Keycloak JWT, expected Bearer token")
+
+        token = {}
+        if "access_token" not in creds:
+            raise IOError("Unexpected Keycloak JWT, no access_token")
+        token["accessToken"] = creds["access_token"]
+        if "refesh_token" in creds:
+            token["refreshToken"] = creds["refresh_token"]
+        if "expires_in" in creds:
+            now = time.time()
+            token['expiresOn'] = now + creds["expires_in"]
+
+        # TBD: client_secret
+        # TBD: scopes
+        # TBD: client_id
+
+        return token
+
+    def acquire(self):
+        """Acquire a new Keycloak token."""
+        keycloak_url = self._getKeycloakUrl()
+
+        headers = {"Content-Type": "application/x-www-form-urlencoded"}
+        body = {}
+        body["username"] = self._username
+        body["password"] = self._password
+        body["grant_type"] = "password"
+        body["client_id"] = self.config.get("keycloak_client_id")
+        rsp = requests.post(keycloak_url, data=body, headers=headers)
+
+        if rsp.status_code not in (200, 201):
+            print(f"POST error: {rsp.status_code}")
+            raise IOError(f"Keycloak response: {rsp.status_code}")
+
+        creds = rsp.json()  # TBD: catch json format errors?
+        self._token = self._parse(creds)
+
+    def refresh(self):
+        """Try to renew a token."""
+        # TBD
+        # unclear if refresh is supported without a client secret
+        self._token = None
diff --git a/src/h5json/selections.py b/src/h5json/selections.py
new file mode 100644
index 0000000..3a94b09
--- /dev/null
+++ b/src/h5json/selections.py
@@ -0,0 +1,847 @@
+##############################################################################
+# Copyright by The HDF Group.                                                #
+# All rights reserved.                                                       #
+#                                                                            #
+# This file is part of H5Serv (HDF5 REST Server) Service, Libraries and      #
+# Utilities.  The full HDF5 REST Server copyright notice, including          #
+# terms governing use, modification, and redistribution, is contained in     #
+# the file COPYING, which can be found at the root of the source code        #
+# distribution tree.  If you do not have access to this file, you may        #
+# request a copy from help@hdfgroup.org.                                     #
+##############################################################################
+
+# We use __getitem__ side effects, which pylint doesn't like.
+# pylint: disable=pointless-statement
+
+"""
+    High-level access to HDF5 dataspace selections
+"""
+
+from __future__ import absolute_import
+
+import numpy as np
+
+H5S_SEL_POINTS = 0
+H5S_SELECT_SET = 1
+H5S_SELECT_APPEND = 2
+H5S_SELECT_PREPEND = 3
+H5S_SELECT_OR = 4
+H5S_SELECT_NONE = 5
+H5S_SELECT_ALL = 6
+H5S_SELECT_HYPERSLABS = 7
+H5S_SELECT_NOTB = 8
+H5S_SELLECT_FANCY = 9
+
+
+def select(obj, args):
+    """ High-level routine to generate a selection from arbitrary arguments
+    to __getitem__.  The arguments should be the following:
+
+    obj
+        Datatset object
+
+    args
+        Either a single argument or a tuple of arguments.  See below for
+        supported classes of argument.
+
+    Argument classes:
+
+    Single Selection instance
+        Returns the argument.
+
+    numpy.ndarray
+        Must be a boolean mask.  Returns a PointSelection instance.
+
+    RegionReference
+        Returns a Selection instance.
+
+    Indices, slices, ellipses only
+        Returns a SimpleSelection instance
+
+    Indices, slices, ellipses, lists or boolean index arrays
+        Returns a FancySelection instance.
+    """
+    if not isinstance(args, tuple):
+        args = (args,)
+
+    if hasattr(obj, "shape") and obj.shape == ():
+        # scalar object
+        sel = ScalarSelection(obj.shape, args)
+        return sel
+
+    # "Special" indexing objects
+    if len(args) == 1:
+
+        arg = args[0]
+
+        if isinstance(arg, Selection):
+            if arg.shape != obj.shape:
+                raise TypeError("Mismatched selection shape")
+            return arg
+
+        elif isinstance(arg, np.ndarray) or isinstance(arg, list):
+            sel = PointSelection(obj.shape)
+            sel[arg]
+            return sel
+        """
+        #todo - RegionReference
+        elif isinstance(arg, h5r.RegionReference):
+            sid = h5r.get_region(arg, dsid)
+            if shape != sid.shape:
+                raise TypeError("Reference shape does not match dataset shape")
+
+            return Selection(shape, spaceid=sid)
+        """
+
+    for a in args:
+        use_fancy = False
+        if isinstance(a, np.ndarray):
+            use_fancy = True
+        elif a is []:
+            use_fancy = True
+        elif not isinstance(a, slice) and a is not Ellipsis:
+            try:
+                int(a)
+            except Exception:
+                use_fancy = True
+        if use_fancy and hasattr(obj, "shape"):
+            sel = FancySelection(obj.shape)
+            sel[args]
+            return sel
+    if hasattr(obj, "shape"):
+        sel = SimpleSelection(obj.shape)
+    else:
+        sel = SimpleSelection(obj)
+    sel[args]
+    return sel
+
+
+def intersect(s1, s2):
+    """ Return the intersection of two selections """
+    # TBD: this is currently only working for simple selections with stride 1
+    valid_select_types = (H5S_SELECT_HYPERSLABS, H5S_SELECT_ALL)
+    if not isinstance(s1, Selection):
+        raise TypeError("Expected selection type for first arg")
+    if not isinstance(s2, Selection):
+        raise TypeError("Expected selection type for second arg")
+    if s1.select_type not in valid_select_types:
+        raise TypeError("Expected hyperslab selection for first arg")
+    if s2.select_type not in valid_select_types:
+        raise TypeError("Expected hyperslab selection for second arg")
+    if s1.shape != s2.shape:
+        raise ValueError("selections have incompatible shapes")
+
+    slices = []
+    rank = len(s1.shape)
+    for dim in range(rank):
+        start = max(s1.start[dim], s2.start[dim])
+        stop = min(s1.start[dim] + s1.count[dim], s2.start[dim] + s2.count[dim])
+        msg = "stepped slices not currently supported"
+        if s1.step[dim] > 1:
+            raise ValueError(msg)
+        if s2.step[dim] > 1:
+            raise ValueError("stepped slices not currently supported")
+        if start > stop:
+            stop = start
+        slices.append(slice(start, stop, 1))
+    slices = tuple(slices)
+
+    return select(s1.shape, slices)
+
+
+class Selection(object):
+
+    """
+        Base class for HDF5 dataspace selections.  Subclasses support the
+        "selection protocol", which means they have at least the following
+        members:
+
+        __init__(shape)   => Create a new selection on "shape"-tuple
+        __getitem__(args) => Perform a selection with the range specified.
+                             What args are allowed depends on the
+                             particular subclass in use.
+
+        id (read-only) =>      h5py.h5s.SpaceID instance
+        shape (read-only) =>   The shape of the dataspace.
+        mshape  (read-only) => The shape of the selection region.
+                               Not guaranteed to fit within "shape", although
+                               the total number of points is less than
+                               product(shape).
+        nselect (read-only) => Number of selected points.  Always equal to
+                               product(mshape).
+
+        broadcast(target_shape) => Return an iterable which yields dataspaces
+                                   for read, based on target_shape.
+
+        The base class represents "unshaped" selections (1-D).
+    """
+
+    def __init__(self, shape, *args, **kwds):
+        """ Create a selection.   """
+
+        shape = tuple(shape)
+        self._shape = shape
+
+        self._select_type = H5S_SELECT_ALL
+
+    @property
+    def select_type(self):
+        """ SpaceID instance """
+        return self._select_type
+
+    @property
+    def shape(self):
+        """ Shape of whole dataspace """
+        return self._shape
+
+    @property
+    def nselect(self):
+        """ Number of elements currently selected """
+
+        return self.getSelectNpoints()
+
+    @property
+    def mshape(self):
+        """ Shape of selection (always 1-D for this class) """
+        return (self.nselect,)
+
+    def getSelectNpoints(self):
+        npoints = None
+        if self._select_type == H5S_SELECT_NONE:
+            npoints = 0
+        elif self._select_type == H5S_SELECT_ALL:
+            dims = self._shape
+            npoints = 1
+            for nextent in dims:
+                npoints *= nextent
+        else:
+            raise IOError("Unsupported select type")
+        return npoints
+
+    def broadcast(self, target_shape):
+        """ Get an iterable for broadcasting """
+        if np.product(target_shape) != self.nselect:
+            raise TypeError("Broadcasting is not supported for point-wise selections")
+        yield self._id
+
+    def __getitem__(self, args):
+        raise NotImplementedError("This class does not support indexing")
+
+    def __repr__(self):
+        return f"Selection(shape:{self._shape})"
+
+
+class PointSelection(Selection):
+
+    """
+        Represents a point-wise selection.  You can supply sequences of
+        points to the three methods append(), prepend() and set(), or a
+        single boolean array to __getitem__.
+    """
+    def __init__(self, shape, *args, **kwds):
+        """ Create a Point selection.   """
+        Selection.__init__(self, shape, *args, **kwds)
+        self._points = []
+
+    @property
+    def points(self):
+        """ selection points """
+        return self._points
+
+    def getSelectNpoints(self):
+        npoints = None
+        if self._select_type == H5S_SELECT_NONE:
+            npoints = 0
+        elif self._select_type == H5S_SELECT_ALL:
+            dims = self._shape
+            npoints = 1
+            for nextent in dims:
+                npoints *= nextent
+        elif self._select_type == H5S_SEL_POINTS:
+            dims = self._shape
+            rank = len(dims)
+            if len(self._points) == rank and not type(self._points[0]) in (list, tuple, np.ndarray):
+                npoints = 1
+            else:
+                npoints = len(self._points)
+        else:
+            raise IOError("Unsupported select type")
+        return npoints
+
+    def _perform_selection(self, points, op):
+        """ Internal method which actually performs the selection """
+        if isinstance(points, np.ndarray) or True:
+            points = np.asarray(points, order='C', dtype='u8')
+            if len(points.shape) == 1:
+                # points.shape = (1,points.shape[0])
+                pass
+
+        if self._select_type != H5S_SEL_POINTS:
+            op = H5S_SELECT_SET
+        self._select_type = H5S_SEL_POINTS
+
+        if op == H5S_SELECT_SET:
+            self._points = points
+        elif op == H5S_SELECT_APPEND:
+            self._points.extent(points)
+        elif op == H5S_SELECT_PREPEND:
+            tmp = self._points
+            self._points = points
+            self._points.extend(tmp)
+        else:
+            raise ValueError("Unsupported operation")
+
+    # def _perform_list_selection(points, H5S_SELECT_SET):
+
+    def __getitem__(self, arg):
+        """ Perform point-wise selection from a NumPy boolean array """
+        if isinstance(arg, list):
+            points = arg
+        else:
+            if not (isinstance(arg, np.ndarray) and arg.dtype.kind == 'b'):
+                raise TypeError("PointSelection __getitem__ only works with bool arrays")
+            if not arg.shape == self._shape:
+                raise TypeError("Boolean indexing array has incompatible shape")
+
+            points = np.transpose(arg.nonzero())
+        self.set(points)
+        return self
+
+    def append(self, points):
+        """ Add the sequence of points to the end of the current selection """
+        self._perform_selection(points, H5S_SELECT_APPEND)
+
+    def prepend(self, points):
+        """ Add the sequence of points to the beginning of the current selection """
+        self._perform_selection(points, H5S_SELECT_PREPEND)
+
+    def set(self, points):
+        """ Replace the current selection with the given sequence of points"""
+        """
+        if isinstance(points, list):
+            # selection with list of points
+            self._perform_list_selection(points, H5S_SELECT_SET)
+
+        else:
+            # selection with boolean ndarray
+        """
+        self._perform_selection(points, H5S_SELECT_SET)
+
+    def __repr__(self):
+        return f"PointSelection(shape:{self._shape}, {len(self._points)} points)"
+
+
+class SimpleSelection(Selection):
+
+    """ A single "rectangular" (regular) selection composed of only slices
+        and integer arguments.  Can participate in broadcasting.
+    """
+
+    @property
+    def mshape(self):
+        """ Shape of current selection """
+        return self._mshape
+
+    @property
+    def start(self):
+        return self._sel[0]
+
+    @property
+    def count(self):
+        return self._sel[1]
+
+    @property
+    def step(self):
+        return self._sel[2]
+
+    def __init__(self, shape, *args, **kwds):
+        Selection.__init__(self, shape, *args, **kwds)
+        rank = len(self._shape)
+        self._sel = ((0,) * rank, self._shape, (1,) * rank, (False,) * rank)
+        self._mshape = self._shape
+        self._select_type = H5S_SELECT_ALL
+
+    def __getitem__(self, args):
+
+        if not isinstance(args, tuple):
+            args = (args,)
+
+        if self._shape == ():
+            if len(args) > 0 and args[0] not in (Ellipsis, ()):
+                raise TypeError("Invalid index for scalar dataset (only ..., () allowed)")
+            self._select_type = H5S_SELECT_ALL
+            return self
+
+        start, count, step, scalar = _handle_simple(self._shape, args)
+        self._sel = (start, count, step, scalar)
+
+        # self._id.select_hyperslab(start, count, step)
+        self._select_type = H5S_SELECT_HYPERSLABS
+
+        self._mshape = tuple(x for x, y in zip(count, scalar) if not y)
+
+        return self
+
+    def getSelectNpoints(self):
+        """Return number of elements in current selection
+        """
+        npoints = None
+        if self._select_type == H5S_SELECT_NONE:
+            npoints = 0
+        elif self._select_type == H5S_SELECT_ALL:
+            dims = self._shape
+            npoints = 1
+            for nextent in dims:
+                npoints *= nextent
+        elif self._select_type == H5S_SELECT_HYPERSLABS:
+            dims = self._shape
+            npoints = 1
+            rank = len(dims)
+            for i in range(rank):
+                npoints *= self.count[i]
+        else:
+            raise IOError("Unsupported select type")
+        return npoints
+
+    def getQueryParam(self):
+        """ Get select param for use with HDF Rest API"""
+        param = ''
+        rank = len(self._shape)
+        if rank == 0:
+            return None
+
+        param += "["
+        for i in range(rank):
+            start = self.start[i]
+            stop = start + (self.count[i] * self.step[i])
+            if stop > self._shape[i]:
+                stop = self._shape[i]
+            dim_sel = str(start) + ':' + str(stop)
+            if self.step[i] != 1:
+                dim_sel += ':' + str(self.step[i])
+            if i != rank - 1:
+                dim_sel += ','
+            param += dim_sel
+        param += ']'
+        return param
+
+    def broadcast(self, target_shape):
+        """ Return an iterator over target dataspaces for broadcasting.
+
+        Follows the standard NumPy broadcasting rules against the current
+        selection shape (self._mshape).
+        """
+        if self._shape == ():
+            if np.product(target_shape) != 1:
+                raise TypeError(f"Can't broadcast {target_shape} to scalar")
+            self._id.select_all()
+            yield self._id
+            return
+
+        start, count, step, scalar = self._sel
+
+        rank = len(count)
+        target = list(target_shape)
+
+        tshape = []
+        for idx in range(1, rank + 1):
+            if len(target) == 0 or scalar[-idx]:     # Skip scalar axes
+                tshape.append(1)
+            else:
+                t = target.pop()
+                if t == 1 or count[-idx] == t:
+                    tshape.append(t)
+                else:
+                    raise TypeError(f"Can't broadcast {target_shape} -> {count}")
+        tshape.reverse()
+        tshape = tuple(tshape)
+
+        chunks = tuple(x // y for x, y in zip(count, tshape))
+        nchunks = int(np.product(chunks))
+
+        if nchunks == 1:
+            yield self._id
+        else:
+            sid = self._id.copy()
+            sid.select_hyperslab((0,) * rank, tshape, step)
+            for idx in range(nchunks):
+                offset = tuple(x * y * z + s for x, y, z, s in zip(np.unravel_index(idx, chunks), tshape, step, start))
+                sid.offset_simple(offset)
+                yield sid
+
+    @property
+    def slices(self):
+        """ return tuple of slices for this selection """
+        rank = len(self.shape)
+        slices = []
+        for dim in range(rank):
+            start = self.start[dim]
+            stop = start + self.count[dim]
+            step = self.step[dim]
+            slices.append(slice(start, stop, step))
+        return tuple(slices)
+
+    def __repr__(self):
+        s = f"SimpleSelection(shape:{self._shape}, start: {self._sel[0]},"
+        s += f" count: {self._sel[1]}, step: {self._sel[2]}"
+        return s
+
+
+class FancySelection(Selection):
+
+    """
+        Implements advanced NumPy-style selection operations in addition to
+        the standard slice-and-int behavior.
+
+        Indexing arguments may be ints, slices, lists of indicies, or
+        per-axis (1D) boolean arrays.
+
+        Broadcasting is not supported for these selections.
+    """
+
+    @property
+    def slices(self):
+        return self._slices
+
+    @property
+    def mshape(self):
+        """ Shape of current selection """
+        return self._mshape
+
+    def __init__(self, shape, *args, **kwds):
+        Selection.__init__(self, shape, *args, **kwds)
+        self._slices = []
+
+    def __getitem__(self, args):
+
+        if not isinstance(args, tuple):
+            args = (args,)
+
+        args = _expand_ellipsis(args, len(self._shape))
+        select_type = H5S_SELECT_HYPERSLABS  # will adjust if we have a coord
+
+        # Create list of slices and/or coordinates
+        slices = []
+        mshape = []
+        num_coordinates = None
+        for idx, arg in enumerate(args):
+            length = self._shape[idx]
+            if isinstance(arg, slice):
+                _, count, _ = _translate_slice(arg, length)  # raise exception for invalid slice
+                if arg.start is None:
+                    start = 0
+                else:
+                    start = arg.start
+                if arg.stop is None:
+                    stop = length
+                else:
+                    stop = arg.stop
+                if arg.step is None:
+                    step = 1
+                else:
+                    step = arg.step
+                slices.append(slice(start, stop, step))
+                mshape.append(count)
+
+            elif hasattr(arg, 'dtype') and arg.dtype == np.dtype('bool'):
+                if len(arg.shape) != 1:
+                    raise TypeError("Boolean indexing arrays must be 1-D")
+                arg = arg.nonzero()[0]
+                try:
+                    slices.append(list(arg))
+                except TypeError:
+                    pass
+                else:
+                    if sorted(arg) != list(arg):
+                        raise TypeError("Indexing elements must be in increasing order")
+                mshape.append(len(arg))
+                select_type = H5S_SELLECT_FANCY
+            elif isinstance(arg, list) or hasattr(arg, 'dtype'):
+                # coordinate selection
+                slices.append(arg)
+                for x in arg:
+                    if x < 0 or x >= length:
+                        raise IndexError(f"Index ({arg}) out of range (0-{length - 1})")
+                if num_coordinates is None:
+                    num_coordinates = len(arg)
+                elif num_coordinates == len(arg):
+                    # second set of coordinates doesn't effect mshape
+                    continue
+                else:
+                    # this shouldn't happen since HSDS would have thrown an error
+                    raise ValueError("coordinate num element missmatch")
+                mshape.append(len(arg))
+                select_type = H5S_SELLECT_FANCY
+            elif isinstance(arg, int):
+                if arg < 0 or arg >= length:
+                    raise IndexError(f"Index ({arg}) out of range (0-{length - 1})")
+                slices.append(arg)
+            elif isinstance(arg, type(Ellipsis)):
+                slices.append(slice(0, length, 1))
+            else:
+                raise TypeError(f"Unexpected arg type: {arg} - {type(arg)}")
+        self._slices = slices
+        self._select_type = select_type
+        self._mshape = tuple(mshape)
+
+    def getSelectNpoints(self):
+        """Return number of elements in current selection
+        """
+        npoints = 1
+        for idx, s in enumerate(self._slices):
+            if isinstance(s, slice):
+                length = self._shape[idx]
+                _, count, _ = _translate_slice(s, length)
+            elif isinstance(s, list):
+                count = len(s)
+            else:
+                # scalar selection
+                count = 1
+            npoints *= count
+
+        return npoints
+
+    def getQueryParam(self):
+        """ Get select param for use with HDF Rest API"""
+        query = []
+        query.append('[')
+        rank = len(self._slices)
+        for dim, s in enumerate(self._slices):
+            if isinstance(s, slice):
+                if s.start is None and s.stop is None:
+                    query.append(':')
+                elif s.stop is None:
+                    query.append(f"{s.start}:")
+                else:
+                    query.append(f"{s.start}:{s.stop}")
+                if s.step and s.step != 1:
+                    query.append(f":{s.step}")
+            elif isinstance(s, list) or hasattr(s, 'dtype'):
+                query.append('[')
+                for idx, n in enumerate(s):
+                    query.append(str(n))
+                    if idx + 1 < len(s):
+                        query.append(',')
+                query.append(']')
+            else:
+                # scalar selection
+                query.append(str(s))
+            if dim + 1 < rank:
+                query.append(',')
+        query.append(']')
+        return "".join(query)
+
+    def broadcast(self, target_shape):
+        raise TypeError("Broadcasting is not supported for complex selections")
+
+    def __repr__(self):
+        return f"FancySelection(shape:{self._shape}, slices: {self._slices})"
+
+
+def _expand_ellipsis(args, rank):
+    """ Expand ellipsis objects and fill in missing axes.
+    """
+    n_el = sum(1 for arg in args if arg is Ellipsis)
+    if n_el > 1:
+        raise ValueError("Only one ellipsis may be used.")
+    elif n_el == 0 and len(args) != rank:
+        args = args + (Ellipsis,)
+
+    final_args = []
+    n_args = len(args)
+    for arg in args:
+
+        if arg is Ellipsis:
+            final_args.extend((slice(None, None, None),) * (rank - n_args + 1))
+        else:
+            final_args.append(arg)
+
+    if len(final_args) > rank:
+        raise TypeError("Argument sequence too long")
+
+    return final_args
+
+
+def _handle_simple(shape, args):
+    """ Process a "simple" selection tuple, containing only slices and
+        integer objects.  Return is a 4-tuple with tuples for start,
+        count, step, and a flag which tells if the axis is a "scalar"
+        selection (indexed by an integer).
+
+        If "args" is shorter than "shape", the remaining axes are fully
+        selected.
+    """
+    args = _expand_ellipsis(args, len(shape))
+
+    start = []
+    count = []
+    step = []
+    scalar = []
+
+    for arg, length in zip(args, shape):
+        if isinstance(arg, slice):
+            x, y, z = _translate_slice(arg, length)
+            s = False
+        else:
+            try:
+                x, y, z = _translate_int(int(arg), length)
+                s = True
+            except TypeError:
+                raise TypeError(f'Illegal index "{arg}" (must be a slice or number)')
+        start.append(x)
+        count.append(y)
+        step.append(z)
+        scalar.append(s)
+
+    return tuple(start), tuple(count), tuple(step), tuple(scalar)
+
+
+def _translate_int(exp, length):
+    """ Given an integer index, return a 3-tuple
+        (start, count, step)
+        for hyperslab selection
+    """
+    if exp < 0:
+        exp = length + exp
+
+    if not 0 <= exp < length:
+        raise IndexError(f"Index ({exp}) out of range (0-{length - 1})")
+
+    return exp, 1, 1
+
+
+def _translate_slice(exp, length):
+    """ Given a slice object, return a 3-tuple
+        (start, count, step)
+        for use with the hyperslab selection routines
+    """
+    start, stop, step = exp.indices(length)
+    # Now if step > 0, then start and stop are in [0, length];
+    # if step < 0, they are in [-1, length - 1] (Python 2.6b2 and later;
+    # Python issue 3004).
+
+    if step < 1:
+        raise ValueError("Step must be >= 1 (got %d)" % step)
+    if stop < start:
+        stop = start
+
+    count = 1 + (stop - start - 1) // step
+
+    return start, count, step
+
+
+def guess_shape(sid):
+    """ Given a dataspace, try to deduce the shape of the selection.
+
+    Returns one of:
+        * A tuple with the selection shape, same length as the dataspace
+        * A 1D selection shape for point-based and multiple-hyperslab selections
+        * None, for unselected scalars and for NULL dataspaces
+    """
+
+    sel_class = sid.get_simple_extent_type()    # Dataspace class
+    sel_type = sid.get_select_type()            # Flavor of selection in use
+
+    if sel_class == 'H5S_NULL':
+        # NULL dataspaces don't support selections
+        return None
+
+    elif sel_class == 'H5S_SCALAR':
+        # NumPy has no way of expressing empty 0-rank selections, so we use None
+        if sel_type == H5S_SELECT_NONE:
+            return None
+        if sel_type == H5S_SELECT_ALL:
+            return tuple()
+
+    elif sel_class != 'H5S_SIMPLE':
+        raise TypeError(f"Unrecognized dataspace class {sel_class}")
+
+    # We have a "simple" (rank >= 1) dataspace
+
+    N = sid.get_select_npoints()
+    rank = len(sid.shape)
+
+    if sel_type == H5S_SELECT_NONE:
+        return (0,) * rank
+
+    elif sel_type == H5S_SELECT_ALL:
+        return sid.shape
+
+    elif sel_type == H5S_SEL_POINTS:
+        # Like NumPy, point-based selections yield 1D arrays regardless of
+        # the dataspace rank
+        return (N,)
+
+    elif sel_type != H5S_SELECT_HYPERSLABS:
+        raise TypeError(f"Unrecognized selection method {sel_type}")
+
+    # We have a hyperslab-based selection
+
+    if N == 0:
+        return (0,) * rank
+
+    bottomcorner, topcorner = (np.array(x) for x in sid.get_select_bounds())
+
+    # Shape of full selection box
+    boxshape = topcorner - bottomcorner + np.ones((rank,))
+
+    def get_n_axis(sid, axis):
+        """ Determine the number of elements selected along a particular axis.
+
+        To do this, we "mask off" the axis by making a hyperslab selection
+        which leaves only the first point along the axis.  For a 2D dataset
+        with selection box shape (X, Y), for axis 1, this would leave a
+        selection of shape (X, 1).  We count the number of points N_leftover
+        remaining in the selection and compute the axis selection length by
+        N_axis = N/N_leftover.
+        """
+
+        if (boxshape[axis]) == 1:
+            return 1
+
+        start = bottomcorner.copy()
+        start[axis] += 1
+        count = boxshape.copy()
+        count[axis] -= 1
+
+        # Throw away all points along this axis
+        masked_sid = sid.copy()
+        masked_sid.select_hyperslab(tuple(start), tuple(count), op=H5S_SELECT_NOTB)
+
+        N_leftover = masked_sid.get_select_npoints()
+
+        return N // N_leftover
+
+    shape = tuple(get_n_axis(sid, x) for x in range(rank))
+
+    if np.product(shape) != N:
+        # This means multiple hyperslab selections are in effect,
+        # so we fall back to a 1D shape
+        return (N,)
+
+    return shape
+
+
+class ScalarSelection(Selection):
+
+    """
+        Implements slicing for scalar datasets.
+    """
+
+    @property
+    def mshape(self):
+        return self._mshape
+
+    def __init__(self, shape, *args, **kwds):
+        Selection.__init__(self, shape, *args, **kwds)
+        arg = None
+        if len(args) > 0:
+            arg = args[0]
+        if arg == ():
+            self._mshape = None
+            self._select_type = H5S_SELECT_ALL
+        elif arg == (Ellipsis,):
+            self._mshape = ()
+            self._select_type = H5S_SELECT_ALL
+        else:
+            raise ValueError("Illegal slicing argument for scalar dataspace")
diff --git a/src/h5json/writer/__init__.py b/src/h5json/writer/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/test/integ/h5tojson_test.py b/test/integ/h5tojson_test.py
index 68b0464..5be40c8 100644
--- a/test/integ/h5tojson_test.py
+++ b/test/integ/h5tojson_test.py
@@ -35,7 +35,7 @@
     "compound.h5",
     "compound_array.h5",
     "compound_array_attr.h5",
-    # "compound_array_vlen_string.h5",  # crashes python w/ Linux!
+    "compound_array_vlen_string.h5",  # crashes python w/ Linux?
     "compound_array_dset.h5",
     "compound_attr.h5",
     "compound_committed.h5",
@@ -47,8 +47,8 @@
     "enum_attr.h5",
     "enum_dset.h5",
     "fillvalue.h5",
-    "fixed_string_attr.h5",  # temp for trying travis
-    "fixed_string_dset.h5",  # temp for trying travis
+    "fixed_string_attr.h5",
+    "fixed_string_dset.h5",
     "h5ex_d_alloc.h5",
     "h5ex_d_checksum.h5",
     "h5ex_d_chunk.h5",
diff --git a/test/integ/jsontoh5_test.py b/test/integ/jsontoh5_test.py
index dad5648..3be3a3b 100644
--- a/test/integ/jsontoh5_test.py
+++ b/test/integ/jsontoh5_test.py
@@ -36,7 +36,7 @@
     # "compound_array.json",
     # "compound_array_attr.json",
     # "compound_array_dset.json",
-    "compound_array_vlen_string.json",
+    # "compound_array_vlen_string.json",  # regression
     "compound_attr.json",
     "compound_committed.json",
     "dim_scale.json",
@@ -95,7 +95,7 @@
     "regionref_attr.json",
     # "regionref_dset.json",
     "scalar_attr.json",
-    "vlen_attr.json",
+    # "vlen_attr.json",   #regression
     "vlen_dset.json",
     "vlen_string_attr.json",
     "vlen_string_dset.json",
diff --git a/test/unit/array_util_test.py b/test/unit/array_util_test.py
new file mode 100644
index 0000000..1ede343
--- /dev/null
+++ b/test/unit/array_util_test.py
@@ -0,0 +1,1269 @@
+##############################################################################
+# Copyright by The HDF Group.                                                #
+# All rights reserved.                                                       #
+#                                                                            #
+# This file is part of HSDS (HDF5 Scalable Data Service), Libraries and      #
+# Utilities.  The full HSDS copyright notice, including                      #
+# terms governing use, modification, and redistribution, is contained in     #
+# the file COPYING, which can be found at the root of the source code        #
+# distribution tree.  If you do not have access to this file, you may        #
+# request a copy from help@hdfgroup.org.                                     #
+##############################################################################
+import unittest
+import json
+import numpy as np
+
+import base64
+
+from h5json.array_util import bytesArrayToList
+from h5json.array_util import toTuple
+from h5json.array_util import getNumElements
+from h5json.array_util import jsonToArray
+from h5json.array_util import arrayToBytes
+from h5json.array_util import bytesToArray
+from h5json.array_util import getByteArraySize
+from h5json.array_util import IndexIterator
+from h5json.array_util import ndarray_compare
+from h5json.array_util import getNumpyValue
+from h5json.array_util import getBroadcastShape
+from h5json.array_util import isVlen
+
+from h5json.hdf5dtype import special_dtype
+from h5json.hdf5dtype import check_dtype
+from h5json.hdf5dtype import createDataType
+
+
+class ArrayUtilTest(unittest.TestCase):
+    def __init__(self, *args, **kwargs):
+        super(ArrayUtilTest, self).__init__(*args, **kwargs)
+        # main
+
+    def testByteArrayToList(self):
+        data_items = (
+            42,
+            "foo",
+            b"foo",
+            [1, 2, 3],
+            (1, 2, 3),
+            ["A", "B", "C"],
+            [b"A", b"B", b"C"],
+            [["A", "B"], [b"a", b"b", b"c"]],
+        )
+        for data in data_items:
+            json_data = bytesArrayToList(data)
+            # will throw TypeError if not able to convert
+            json.dumps(json_data)
+
+    def testToTuple(self):
+        data0d = 42  # scalar
+        data1d1 = [1]  # one dimensional, one element list
+        data1d = [1, 2, 3, 4, 5]  # list
+        data2d1 = [
+            [1, 2],
+        ]  # two dimensional, one element
+        data2d = [[1, 0.1], [2, 0.2], [3, 0.3], [4, 0.4]]  # list of two-element lists
+        data3d = [[[0, 0.0], [1, 0.1]], [[2, 0.2], [3, 0.3]]]  # list of list of lists
+        out = toTuple(0, data0d)
+        self.assertEqual(data0d, out)
+        out = toTuple(1, data1d1)
+        self.assertEqual(data1d1, out)
+        out = toTuple(1, data1d)
+        self.assertEqual(data1d, out)
+        out = toTuple(2, data2d)
+        self.assertEqual(data2d, out)
+        out = toTuple(1, data2d1)
+        self.assertEqual([(1, 2)], out)
+        out = toTuple(3, data3d)
+        self.assertEqual(data3d, out)
+        out = toTuple(1, data2d)  # treat input as 1d array of two-field compound types
+        self.assertEqual([(1, 0.1), (2, 0.2), (3, 0.3), (4, 0.4)], out)
+        out = toTuple(2, data3d)  # treat input as 2d array of two-field compound types
+        self.assertEqual([[(0, 0.0), (1, 0.1)], [(2, 0.2), (3, 0.3)]], out)
+        out = toTuple(1, data3d)  # treat input a 1d array of compound type of compound types
+        self.assertEqual([((0, 0.0), (1, 0.1)), ((2, 0.2), (3, 0.3))], out)
+
+    def testToTupleStrData(self):
+        data = "a string!"
+        out = toTuple(0, data)
+        self.assertEqual(data, out)
+
+        data = ["a string!"]
+        out = toTuple(1, data)
+        self.assertEqual(data, out)
+
+        data = ["a string2"]
+        out = toTuple(1, data)
+        self.assertEqual(data, out)
+
+        data = [["partA", "partB", "partC"],]
+        out = toTuple(1, data)
+        self.assertEqual([("partA", "partB", "partC"), ], out)
+
+        data = [[[4, 8, 12], "four"], [[5, 10, 15], "five"]]
+        out = toTuple(1, data)
+        self.assertEqual([((4, 8, 12), 'four'), ((5, 10, 15), 'five')], out)
+
+    def testGetNumElements(self):
+        shape = (4,)
+        nelements = getNumElements(shape)
+        self.assertEqual(nelements, 4)
+
+        shape = [10,]
+        nelements = getNumElements(shape)
+        self.assertEqual(nelements, 10)
+
+        shape = (10, 8)
+        nelements = getNumElements(shape)
+        self.assertEqual(nelements, 80)
+
+    def testJsonToArray(self):
+
+        # simple integer
+        dt = np.dtype("i4")
+        shape = [4, ]
+        data = [0, 2, 4, 6]
+        out = jsonToArray(shape, dt, data)
+
+        self.assertTrue(isinstance(out, np.ndarray))
+        self.assertEqual(out.shape, (4,))
+        for i in range(4):
+            self.assertEqual(out[i], i * 2)
+
+        shape = ()  # scalar
+        data = 42
+        out = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(out, np.ndarray))
+        self.assertEqual(out.shape, ())
+        self.assertEqual(out[()], 42)
+
+        shape = (1, )  # one element
+        data = 42
+        out = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(out, np.ndarray))
+        self.assertEqual(out.shape, (1, ))
+        self.assertEqual(out[0], 42)
+
+        shape = (10, )  # multi-1D
+        data = list(range(10))
+        out = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(out, np.ndarray))
+        self.assertEqual(out.shape, (10, ))
+        self.assertEqual(out[5], 5)
+
+        shape = (5, 4)  # multi-2D
+        data = []
+        for i in range(5):
+            data.append([42, ] * 4)
+        out = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(out, np.ndarray))
+        self.assertEqual(out.shape, (5, 4))
+        self.assertEqual(out[2, 3], 42)
+
+        shape = (5, 4)  # multi-2D, reshape input data
+        data = [42, ] * 20
+        out = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(out, np.ndarray))
+        self.assertEqual(out.shape, (5, 4))
+        self.assertEqual(out[2, 3], 42)
+
+        dt = np.dtype("S10")  # fixed size string
+        shape = [5, ]
+        data = ["parting", "is", "such", "sweet", "sorrow"]
+        out = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(out, np.ndarray))
+        self.assertEqual(out.shape, (5, ))
+        self.assertEqual(out[4], b'sorrow')
+
+        shape = ()  # scalar
+        data = "a string"
+        out = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(out, np.ndarray))
+        self.assertEqual(out.shape, ())
+        self.assertEqual(out[()], b'a string')
+
+        # VLEN Scalar str
+        dt = special_dtype(vlen=str)
+        data = "I'm a string!"
+        shape = []
+        out = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(out, np.ndarray))
+        self.assertEqual(out.shape, ())
+        val = out[()]
+        self.assertEqual(val, data)
+
+        # VLEN one element str
+        dt = special_dtype(vlen=str)
+        data = "I'm a string!"
+        shape = [1,]
+        out = jsonToArray(shape, dt, [data,])
+        self.assertTrue(isinstance(out, np.ndarray))
+        self.assertEqual(out.shape, (1,))
+        val = out[0]
+        self.assertEqual(val, data)
+
+        # VLEN multi element
+        shape = [5, ]
+        data = ["parting", "is", "such", "sweet", "sorrow"]
+        out = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(out, np.ndarray))
+        self.assertEqual(out.shape, (5, ))
+        self.assertEqual(out[4], 'sorrow')
+
+        # VLEN ascii
+        dt = special_dtype(vlen=bytes)
+        data = [b"one", b"two", b"three", b"four", b"five"]
+        shape = [5, ]
+        out = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(out, np.ndarray))
+        self.assertEqual(out.shape, (5,))
+        self.assertTrue("vlen" in out.dtype.metadata)
+        self.assertEqual(out.dtype.metadata["vlen"], bytes)
+        self.assertEqual(out.dtype.kind, "O")
+        self.assertEqual(out.shape, (5,))
+        # TBD: code does not actually enforce use of bytes vs. str,
+        #  probably not worth the effort to fix
+        self.assertEqual(out[2], b"three")
+        self.assertEqual(out[3], b"four")
+
+        # VLEN unicode
+        dt = special_dtype(vlen=bytes)
+        data = ["one", "two", "three", "four", "five"]
+        shape = [5, ]
+        out = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(out, np.ndarray))
+        self.assertTrue("vlen" in out.dtype.metadata)
+        self.assertEqual(out.dtype.metadata["vlen"], bytes)
+        self.assertEqual(out.dtype.kind, "O")
+        self.assertEqual(out[2], "three")
+
+        # test utf8 strings
+        dt = np.dtype("S26")
+        shape = []
+        data = "eight: \u516b"
+        out = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(out, np.ndarray))
+        self.assertEqual(out[()], data.encode("utf8"))
+
+        dt = special_dtype(vlen=str)
+        out = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(out, np.ndarray))
+        self.assertEqual(out[()], data)
+
+        data = ["I'm an UTF-8 null terminated string",]
+        shape = [1,]
+        out = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(out, np.ndarray))
+        self.assertEqual(out[0], data[0])
+
+        dt = np.dtype("S12")
+        data = "eight: \u516b"
+        out = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(out, np.ndarray))
+        self.assertEqual(out[()], data.encode("utf8"))
+
+        # UTF8 encode the data first
+        out = jsonToArray(shape, dt, data.encode('utf8'))
+        self.assertTrue(isinstance(out, np.ndarray))
+        self.assertEqual(out[()], data.encode('utf8'))
+
+        # one-element array
+        shape = [1,]
+        dt = np.dtype("S12")
+        data = "eight: \u516b"
+        out = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(out, np.ndarray))
+        self.assertEqual(out[0], b'eight: \xe5\x85\xab')
+
+        # VLEN data
+        shape = []
+        dt = special_dtype(vlen=np.dtype("S10"))
+        data = ["foo", "bar"]
+        out = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(out, np.ndarray))
+
+        dt = special_dtype(vlen=np.dtype("int32"))
+        shape = [4, ]
+        data = [
+            [1,],
+            [1, 2],
+            [1, 2, 3],
+            [1, 2, 3, 4],
+        ]
+        out = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(out, np.ndarray))
+        self.assertEqual(check_dtype(vlen=out.dtype), np.dtype("int32"))
+
+        self.assertEqual(out.shape, (4,))
+        self.assertEqual(out.dtype.kind, "O")
+        self.assertEqual(check_dtype(vlen=out.dtype), np.dtype("int32"))
+        for i in range(4):
+            e = out[i]  # .tolist()
+            self.assertTrue(isinstance(e, tuple))
+            self.assertEqual(e, tuple(range(1, i + 2)))
+
+        # VLEN 2D data
+        dt = special_dtype(vlen=np.dtype("int32"))
+        shape = [2, 2]
+        data = [
+            [
+                [0,],
+                [1, 2],
+            ],
+            [
+                [1,],
+                [2, 3],
+            ],
+        ]
+        out = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(out, np.ndarray))
+        self.assertEqual(check_dtype(vlen=out.dtype), np.dtype("int32"))
+
+        self.assertEqual(out.shape, (2, 2))
+        self.assertEqual(out.dtype.kind, "O")
+        self.assertEqual(check_dtype(vlen=out.dtype), np.dtype("int32"))
+        for i in range(2):
+            for j in range(2):
+                e = out[i, j]  # .tolist()
+                self.assertTrue(isinstance(e, tuple))
+
+        # create VLEN of obj ref's
+        ref_type = {"class": "H5T_REFERENCE", "base": "H5T_STD_REF_OBJ"}
+        vlen_type = {"class": "H5T_VLEN", "base": ref_type}
+        dt = createDataType(vlen_type)  # np datatype
+
+        id0 = b"g-a4f455b2-c8cf-11e7-8b73-0242ac110009"
+        id1 = b"g-a50af844-c8cf-11e7-8b73-0242ac110009"
+        id2 = b"g-a5236276-c8cf-11e7-8b73-0242ac110009"
+
+        data = [
+            [id0, ],
+            [id0, id1],
+            [id0, id1, id2],
+        ]
+        shape = [3, ]
+        out = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(out, np.ndarray))
+        base_type = check_dtype(vlen=out.dtype)
+        self.assertEqual(base_type.kind, "S")
+        self.assertEqual(base_type.itemsize, 48)
+
+        self.assertEqual(out.shape, (3,))
+        self.assertEqual(out.dtype.kind, "O")
+        self.assertEqual(check_dtype(vlen=out.dtype), np.dtype("S48"))
+
+        e = out[0]
+        self.assertTrue(isinstance(e, tuple))
+        self.assertEqual(e, (id0,))
+        e = out[1]
+        self.assertTrue(isinstance(e, tuple))
+        self.assertEqual(e, (id0, id1))
+        e = out[2]
+        self.assertTrue(isinstance(e, tuple))
+        self.assertEqual(e, (id0, id1, id2))
+
+        # compound type
+        dt = np.dtype([("a", "i4"), ("b", "S5")])
+        shape = [2, ]
+        data = [[4, "four"], [5, "five"]]
+        out = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(out, np.ndarray))
+
+        self.assertEqual(out.shape, (2,))
+        self.assertTrue(isinstance(out[0], np.void))
+        e0 = out[0].tolist()
+        self.assertEqual(e0, (4, b"four"))
+        self.assertTrue(isinstance(out[1], np.void))
+        e1 = out[1].tolist()
+        self.assertEqual(e1, (5, b"five"))
+
+        data = [[6, "six"],]
+        shape = [1,]
+        out = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(out, np.ndarray))
+        self.assertEqual(out.shape, (1,))
+        self.assertTrue(isinstance(out[0], np.void))
+        e1 = out[0].tolist()
+        self.assertEqual(e1, (6, b"six"))
+
+        data = [7, "seven"]
+        shape = []
+        out = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(out, np.ndarray))
+        self.assertEqual(out.shape, ())
+        self.assertTrue(isinstance(out[()], np.void))
+        e1 = out[()].tolist()
+        self.assertEqual(e1, (7, b"seven"))
+
+        data = [8, "eight"]
+        shape = [1,]
+        out = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(out, np.ndarray))
+        self.assertEqual(out.shape, (1,))
+        self.assertTrue(isinstance(out[0], np.void))
+        e1 = out[0].tolist()
+        self.assertEqual(e1, (8, b"eight"))
+
+        dt = np.dtype([("a", "i4"), ("b", "f4")])
+        shape = [1, ]
+        data = [42, 0.42]
+        out = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(out, np.ndarray))
+        self.assertEqual(out.shape, (1, ))
+        e1 = out[0]
+        self.assertEqual(e1[0], 42)
+
+        # compound with VLEN element
+
+        dt_str = special_dtype(vlen=str)
+        dt = np.dtype([("a", "i4"), ("b", dt_str)])
+        shape = [2, ]
+        data = [[4, "four"], [5, "five"]]
+        out = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(out, np.ndarray))
+        self.assertEqual(out.shape, (2,))
+        e0 = out[0].tolist()
+        self.assertEqual(e0, (4, "four"))
+
+        shape = [1, ]
+        data = [[6, "six"],]
+        out = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(out, np.ndarray))
+        self.assertEqual(out.shape, (1,))
+        e0 = out[0].tolist()
+        self.assertEqual(e0, (6, "six"))
+
+        shape = []
+        data = [7, "seven",]
+        out = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(out, np.ndarray))
+        self.assertEqual(out.shape, ())
+        e0 = out[()]
+        self.assertEqual(len(e0), 2)
+        self.assertEqual(e0[0], 7)
+        self.assertEqual(e0[1], "seven")
+
+        # compound type with array field
+        dt = np.dtype([("a", ("i4", 3)), ("b", "S5")])
+        shape = [2, ]
+        data = [[[4, 8, 12], "four"], [[5, 10, 15], "five"]]
+        out = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(out, np.ndarray))
+
+        self.assertEqual(out.shape, (2,))
+        self.assertTrue(isinstance(out[0], np.void))
+        e0 = out[0]
+        self.assertEqual(len(e0), 2)
+        e0a = e0[0]
+        self.assertTrue(isinstance(e0a, np.ndarray))
+        self.assertEqual(e0a[0], 4)
+        self.assertEqual(e0a[1], 8)
+        self.assertEqual(e0a[2], 12)
+        e0b = e0[1]
+        self.assertEqual(e0b, b"four")
+        self.assertTrue(isinstance(out[1], np.void))
+        e1 = out[1]
+        self.assertEqual(len(e1), 2)
+        e1a = e1[0]
+        self.assertTrue(isinstance(e1a, np.ndarray))
+        self.assertEqual(e1a[0], 5)
+        self.assertEqual(e1a[1], 10)
+        self.assertEqual(e1a[2], 15)
+        e1b = e1[1]
+        self.assertEqual(e1b, b"five")
+
+    def testToBytes(self):
+        # Simple array
+        dt = np.dtype("<i4")
+        arr = np.asarray((1, 2, 3, 4), dtype=dt)
+        buffer = arrayToBytes(arr)
+        self.assertEqual(buffer, arr.tobytes())
+
+        # convert buffer back to arr
+        arr_copy = bytesToArray(buffer, dt, (4,))
+        self.assertTrue(np.array_equal(arr, arr_copy))
+
+        # big-endian ints
+        dt = np.dtype(">u8")
+        arr = np.asarray((1, 2, 3, 4), dtype=dt)
+        buffer = arrayToBytes(arr)
+        self.assertEqual(buffer, arr.tobytes())
+
+        # fixed length string
+        dt = np.dtype("S8")
+        arr = np.asarray(("abcdefgh", "ABCDEFGH", "12345678"), dtype=dt)
+        buffer = arrayToBytes(arr)
+        self.assertEqual(buffer, arr.tobytes())
+
+        # convert back to array
+        arr_copy = bytesToArray(buffer, dt, (3,))
+        self.assertTrue(ndarray_compare(arr, arr_copy))
+
+        # fixed length UTF8 string
+        dt = np.dtype("S10")
+        arr = np.asarray(b'eight: \xe5\x85\xab', dtype=dt)
+        buffer = arrayToBytes(arr)
+
+        # convert back to array
+        arr_copy = bytesToArray(buffer, dt, ())
+        self.assertTrue(ndarray_compare(arr, arr_copy))
+
+        # invalid UTF string
+        dt = np.dtype("S2")
+        arr = np.asarray(b'\xff\xfe', dtype=dt)
+        buffer = arrayToBytes(arr)
+
+        # convert back to array
+        arr_copy = bytesToArray(buffer, dt, ())
+        self.assertTrue(ndarray_compare(arr, arr_copy))
+
+        # invalid UTF string with base64 encoding
+        dt = np.dtype("S2")
+        arr = np.asarray(b'\xff\xfe', dtype=dt)
+        buffer = b'//4='  # this is the base64 encoding of b'\xff\xfe'
+
+        # convert back to array
+        arr_copy = bytesToArray(buffer, dt, (), encoding="base64")
+        self.assertTrue(ndarray_compare(arr, arr_copy))
+
+        # Compound non-vlen
+        dt = np.dtype([("x", "f8"), ("y", "i4")])
+        arr = np.zeros((4,), dtype=dt)
+        arr[0] = (3.12, 42)
+        arr[3] = (1.28, 69)
+        buffer = arrayToBytes(arr)
+        self.assertEqual(buffer, arr.tobytes())
+
+        # convert back to array
+        arr_copy = bytesToArray(buffer, dt, (4,))
+        self.assertTrue(ndarray_compare(arr, arr_copy))
+
+        # VLEN of int32's
+        dt = special_dtype(vlen=np.dtype("<i4"))
+        arr = np.zeros((4,), dtype=dt)
+        arr[0] = np.int32([1, ])
+        arr[1] = np.int32([1, 2])
+        arr[2] = 0  # test un-initialized value
+        arr[3] = np.int32([1, 2, 3])
+        buffer = arrayToBytes(arr)
+        self.assertEqual(len(buffer), 40)
+
+        # convert back to array
+        arr_copy = bytesToArray(buffer, dt, (4,))
+        self.assertTrue(ndarray_compare(arr, arr_copy))
+
+        # VLEN of strings
+        dt = special_dtype(vlen=str)
+        arr = np.zeros((5,), dtype=dt)
+        arr[0] = "one: \u4e00"
+        arr[1] = "two: \u4e8c"
+        arr[2] = "three: \u4e09"
+        arr[3] = "four: \u56db"
+        arr[4] = 0
+        buffer = arrayToBytes(arr)
+
+        expected_length = 55
+        expected = bytearray(expected_length)
+        expected[0:4] = b"\x08\x00\x00\x00"
+        expected[4:16] = b"one: \xe4\xb8\x80\x08\x00\x00\x00"
+        expected[16:28] = b"two: \xe4\xba\x8c\n\x00\x00\x00"
+        expected[28:42] = b"three: \xe4\xb8\x89\t\x00\x00\x00"
+        expected[42:55] = b"four: \xe5\x9b\x9b\x00\x00\x00\x00"
+
+        self.assertEqual(len(buffer), expected_length)
+
+        self.assertEqual(buffer, expected)
+        # convert back to array
+        arr_copy = bytesToArray(buffer, dt, (5,))
+
+        self.assertTrue(ndarray_compare(arr, arr_copy))
+        # VLEN of bytes
+        dt = special_dtype(vlen=bytes)
+        arr = np.zeros((5,), dtype=dt)
+        arr[0] = b"Parting"
+        arr[1] = b"is such"
+        arr[2] = b"sweet"
+        arr[3] = b"sorrow"
+        arr[4] = 0
+
+        buffer = arrayToBytes(arr)
+
+        expected = bytearray(45)
+        expected[0:11] = b"\x07\x00\x00\x00Parting"
+        expected[11:22] = b"\x07\x00\x00\x00is such"
+        expected[22:31] = b"\x05\x00\x00\x00sweet"
+        expected[31:41] = b"\x06\x00\x00\x00sorrow"
+        expected[41:45] = b"\x00\x00\x00\x00"
+
+        self.assertEqual(len(buffer), len(expected))
+        self.assertEqual(buffer, expected)  # same serialization as with str
+
+        # convert back to array
+        arr_copy = bytesToArray(buffer, dt, (5,))
+        self.assertTrue(ndarray_compare(arr, arr_copy))
+
+        #
+        # Compound str vlen
+        #
+        dt_vstr = special_dtype(vlen=str)
+        dt = np.dtype([("x", "i4"), ("tag", dt_vstr), ("code", "S4")])
+        arr = np.zeros((4,), dtype=dt)
+        arr[0] = (42, "Hello", "X1")
+        arr[3] = (84, "Bye", "XYZ")
+        count = getByteArraySize(arr)
+        buffer = arrayToBytes(arr)
+
+        self.assertEqual(len(buffer), 56)
+        self.assertEqual(buffer.find(b"Hello"), 8)
+        self.assertEqual(buffer.find(b"Bye"), 49)
+        self.assertEqual(buffer.find(b"X1"), 13)
+        self.assertEqual(buffer.find(b"XYZ"), 52)
+
+        # convert back to array
+        arr_copy = bytesToArray(buffer, dt, (4,))
+        self.assertTrue(ndarray_compare(arr, arr_copy))
+
+        #
+        # Compound int vlen
+        #
+        dt_vint = special_dtype(vlen=np.dtype("<i4"))
+        dt = np.dtype([("x", "int32"), ("tag", dt_vint)])
+        arr = np.zeros((4,), dtype=dt)
+        arr[0] = (42, np.array((), dtype="int32"))
+        arr[3] = (84, np.array((1, 2, 3), dtype="int32"))
+        count = getByteArraySize(arr)
+        self.assertEqual(count, 44)
+        buffer = arrayToBytes(arr)
+        self.assertEqual(len(buffer), 44)
+        buffer_expected = {0: 42, 24: 84, 28: 12, 32: 1, 36: 2, 40: 3}
+        for i in range(44):
+            if i in buffer_expected:
+                self.assertEqual(buffer[i], buffer_expected[i])
+            else:
+                self.assertEqual(buffer[i], 0)
+
+        # convert back to array
+        arr_copy = bytesToArray(buffer, dt, (4,))
+        self.assertTrue(ndarray_compare(arr, arr_copy))
+
+        #
+        # VLEN utf string with array type
+        #
+        dt_str = special_dtype(vlen=str)
+        dt_arr_str = np.dtype((dt_str, (2,)))
+        dt = np.dtype([("x", "i4"), ("tag", dt_arr_str)])
+        arr = np.zeros((4,), dtype=dt)
+        dt_str = special_dtype(vlen=str)
+        arr[0] = (42, np.asarray(["hi", "bye"], dtype=dt_str))
+        arr[3] = (84, np.asarray(["hi-hi", "bye-bye"], dtype=dt_str))
+        buffer = arrayToBytes(arr)
+        self.assertEqual(len(buffer), 81)
+
+        self.assertEqual(buffer.find(b"hi"), 8)
+        self.assertEqual(buffer.find(b"bye"), 14)
+        self.assertEqual(buffer.find(b"hi-hi"), 49)
+        self.assertEqual(buffer.find(b"bye-bye"), 58)
+
+        # convert back to array
+        arr_copy = bytesToArray(buffer, dt, (4,))
+
+        self.assertEqual(arr.dtype, arr_copy.dtype)
+        self.assertEqual(arr.shape, arr_copy.shape)
+        for i in range(4):
+            e = arr[i]
+            e_copy = arr_copy[i]
+            self.assertTrue(np.array_equal(e, e_copy))
+        #
+        # VLEN ascii with array type
+        #
+        dt_str = special_dtype(vlen=bytes)
+        dt_arr_str = np.dtype((dt_str, (2,)))
+        dt = np.dtype([("x", "i4"), ("tag", dt_arr_str)])
+        arr = np.zeros((4,), dtype=dt)
+
+        arr[0] = (42, np.asarray([b"hi", b"bye"], dtype=dt_str))
+        arr[3] = (84, np.asarray([b"hi-hi", b"bye-bye"], dtype=dt_str))
+        buffer = arrayToBytes(arr)
+        self.assertEqual(len(buffer), 81)
+
+        self.assertEqual(buffer.find(b"hi"), 8)
+        self.assertEqual(buffer.find(b"bye"), 14)
+        self.assertEqual(buffer.find(b"hi-hi"), 49)
+        self.assertEqual(buffer.find(b"bye-bye"), 58)
+        # convert back to array
+
+        arr_copy = bytesToArray(buffer, dt, (4,))
+        self.assertTrue(ndarray_compare(arr, arr_copy))
+
+        # test Compound with VLEN
+        count = 4
+        fixed_str8_type = {
+            "charSet": "H5T_CSET_ASCII",
+            "class": "H5T_STRING",
+            "length": 8,
+            "strPad": "H5T_STR_NULLPAD",
+        }
+        fields = [
+            {
+                "type": {"class": "H5T_INTEGER", "base": "H5T_STD_U64BE"},
+                "name": "VALUE1",
+            },
+            {
+                "type": fixed_str8_type,
+                "name": "VALUE2"
+            },
+            {
+                "type": {
+                    "class": "H5T_ARRAY",
+                    "dims": [2],
+                    "base": {
+                        "class": "H5T_STRING",
+                        "charSet": "H5T_CSET_ASCII",
+                        "strPad": "H5T_STR_NULLTERM",
+                        "length": "H5T_VARIABLE",
+                    },
+                },
+                "name": "VALUE3",
+            },
+        ]
+
+        datatype = {"class": "H5T_COMPOUND", "fields": fields}
+
+        dt = createDataType(datatype)
+        self.assertTrue(isVlen(dt))
+
+        # create numpy vlen array
+        arr = np.zeros((count,), dtype=dt)
+        for i in range(count):
+            e = arr[i]
+            e["VALUE1"] = i + 1
+            s = ""
+            for j in range(i + 5):
+                offset = (i + j) % 26
+                s += chr(ord("A") + offset)
+            e["VALUE2"] = s
+            e["VALUE3"] = [b"Hi! " * (i + 1), b"Bye!" * (i + 1)]
+
+        # converts to bytes
+        data = arrayToBytes(arr)
+        self.assertEqual(len(data), 192)  # will vary based on count
+
+        # convert back to array
+        arr_copy = bytesToArray(data, dt, (4,))
+
+        self.assertEqual(arr.dtype, arr_copy.dtype)
+        self.assertEqual(arr.shape, arr_copy.shape)
+        for i in range(4):
+            e = arr[i]
+            e_copy = arr_copy[i]
+            self.assertTrue(np.array_equal(e, e_copy))
+
+    def testArrToBytesBase64(self):
+        # Simple array
+        dt = np.dtype("<i4")
+        arr = np.asarray((1, 2, 3, 4), dtype=dt)
+        buffer = arrayToBytes(arr, encoding="base64")
+        # should be a bit longer than the byte representation...
+        expected_num_bytes = np.prod(arr.shape) * dt.itemsize
+        self.assertTrue(len(buffer) > expected_num_bytes)
+
+        # convert buffer back to arr
+        arr_copy = bytesToArray(buffer, dt, (4,), encoding="base64")
+        self.assertTrue(np.array_equal(arr, arr_copy))
+
+        # fixed length string
+        dt = np.dtype("S8")
+        arr = np.asarray(("abcdefgh", "ABCDEFGH", "12345678"), dtype=dt)
+        buffer = arrayToBytes(arr, encoding="base64")
+
+        # convert back to array
+        arr_copy = bytesToArray(buffer, dt, (3,), encoding="base64")
+        self.assertTrue(ndarray_compare(arr, arr_copy))
+
+        # Compound non-vlen
+        dt = np.dtype([("x", "f8"), ("y", "i4")])
+        arr = np.zeros((4,), dtype=dt)
+        arr[0] = (3.12, 42)
+        arr[3] = (1.28, 69)
+        buffer = arrayToBytes(arr, encoding="base64")
+
+        # convert back to array
+        arr_copy = bytesToArray(buffer, dt, (4,), encoding="base64")
+        self.assertTrue(ndarray_compare(arr, arr_copy))
+
+        # VLEN of int32's
+        dt = special_dtype(vlen=np.dtype("<i4"))
+        arr = np.zeros((4,), dtype=dt)
+        arr[0] = np.int32([1, ])
+        arr[1] = np.int32([1, 2])
+        arr[2] = 0  # test un-initialized value
+        arr[3] = np.int32([1, 2, 3])
+        buffer = arrayToBytes(arr, encoding="base64")
+
+        # convert back to array
+        arr_copy = bytesToArray(buffer, dt, (4,), encoding="base64")
+        self.assertTrue(ndarray_compare(arr, arr_copy))
+
+        # VLEN of strings
+        dt = special_dtype(vlen=str)
+        arr = np.zeros((5,), dtype=dt)
+        arr[0] = "one: \u4e00"
+        arr[1] = "two: \u4e8c"
+        arr[2] = "three: \u4e09"
+        arr[3] = "four: \u56db"
+        arr[4] = 0
+        buffer = arrayToBytes(arr, encoding="base64")
+
+        # convert back to array
+        arr_copy = bytesToArray(buffer, dt, (5,), encoding="base64")
+        self.assertTrue(ndarray_compare(arr, arr_copy))
+        # VLEN of bytes
+        dt = special_dtype(vlen=bytes)
+        arr = np.zeros((5,), dtype=dt)
+        arr[0] = b"Parting"
+        arr[1] = b"is such"
+        arr[2] = b"sweet"
+        arr[3] = b"sorrow"
+        arr[4] = 0
+
+        buffer = arrayToBytes(arr, encoding="base64")
+
+        # convert back to array
+        arr_copy = bytesToArray(buffer, dt, (5,), encoding="base64")
+        self.assertTrue(ndarray_compare(arr, arr_copy))
+
+        #
+        # Compound str vlen
+        #
+        dt_vstr = special_dtype(vlen=str)
+        dt = np.dtype([("x", "i4"), ("tag", dt_vstr), ("code", "S4")])
+        arr = np.zeros((4,), dtype=dt)
+        arr[0] = (42, "Hello", "X1")
+        arr[3] = (84, "Bye", "XYZ")
+        count = getByteArraySize(arr)
+        buffer = arrayToBytes(arr, encoding="base64")
+
+        # convert back to array
+        arr_copy = bytesToArray(buffer, dt, (4,), encoding="base64")
+        self.assertTrue(ndarray_compare(arr, arr_copy))
+
+        #
+        # Compound int vlen
+        #
+        dt_vint = special_dtype(vlen=np.dtype("<i4"))
+        dt = np.dtype([("x", "int32"), ("tag", dt_vint)])
+        arr = np.zeros((4,), dtype=dt)
+        arr[0] = (42, np.array((), dtype="int32"))
+        arr[3] = (84, np.array((1, 2, 3), dtype="int32"))
+        count = getByteArraySize(arr)
+        self.assertEqual(count, 44)
+        buffer = arrayToBytes(arr, encoding="base64")
+
+        # convert back to array
+        arr_copy = bytesToArray(buffer, dt, (4,), encoding="base64")
+        self.assertTrue(ndarray_compare(arr, arr_copy))
+
+        #
+        # VLEN utf string with array type
+        #
+        dt_str = special_dtype(vlen=str)
+        dt_arr_str = np.dtype((dt_str, (2,)))
+        dt = np.dtype([("x", "i4"), ("tag", dt_arr_str)])
+        arr = np.zeros((4,), dtype=dt)
+
+        dt_str = special_dtype(vlen=str)
+        arr[0] = (42, np.asarray(["hi", "bye"], dtype=dt_str))
+        arr[3] = (84, np.asarray(["hi-hi", "bye-bye"], dtype=dt_str))
+        buffer = arrayToBytes(arr, encoding="base64")
+
+        # convert back to array
+        arr_copy = bytesToArray(buffer, dt, (4,), encoding="base64")
+
+        self.assertEqual(arr.dtype, arr_copy.dtype)
+        self.assertEqual(arr.shape, arr_copy.shape)
+        for i in range(4):
+            e = arr[i]
+            e_copy = arr_copy[i]
+            self.assertTrue(np.array_equal(e, e_copy))
+        #
+        # VLEN ascii with array type
+        #
+        dt_str = special_dtype(vlen=bytes)
+        dt_arr_str = np.dtype((dt_str, (2,)))
+        dt = np.dtype([("x", "i4"), ("tag", dt_arr_str)])
+        arr = np.zeros((4,), dtype=dt)
+
+        dt_str = special_dtype(vlen=str)
+        arr[0] = (42, np.asarray([b"hi", b"bye"], dtype=dt_str))
+        arr[3] = (84, np.asarray([b"hi-hi", b"bye-bye"], dtype=dt_str))
+        buffer = arrayToBytes(arr, encoding="base64")
+
+        # convert back to array
+        arr_copy = bytesToArray(buffer, dt, (4,), encoding="base64")
+        self.assertTrue(ndarray_compare(arr, arr_copy))
+
+    def testArrayCompareInt(self):
+        # Simple array
+        dt = np.dtype("<i4")
+        arr1 = np.zeros((1024, 1024), dtype=dt)
+        arr2 = np.zeros((1024, 1024), dtype=dt)
+        for _ in range(100):
+            self.assertTrue(ndarray_compare(arr1, arr2))
+        arr1[123, 456] = 42
+        self.assertFalse(ndarray_compare(arr1, arr2))
+
+    def testArrayCompareVlenInt(self):
+        # Vlen array
+        dt_vint = special_dtype(vlen=np.dtype("<i4"))
+        dt = np.dtype([("x", "int32"), ("tag", dt_vint)])
+        arr1 = np.zeros((1024, 1024), dtype=dt)
+        arr2 = np.zeros((1024, 1024), dtype=dt)
+        e1 = (42, np.array((), dtype="int32"))
+        e2 = (84, np.array((1, 2, 3), dtype="int32"))
+        arr1[123, 456] = e1
+        arr2[123, 456] = e1
+        arr1[888, 999] = e2
+        arr2[888, 999] = e2
+
+        # performance is marginal for this case
+        for _ in range(1):
+            self.assertTrue(ndarray_compare(arr1, arr2))
+        arr2[123, 456] = e2
+        self.assertFalse(ndarray_compare(arr1, arr2))
+
+    def testJsonToBytes(self):
+        #
+        # VLEN int
+        #
+
+        def array_equal(a, b):
+            """ compare two values element by element."""
+            if type(a) in (list, tuple, np.void, np.ndarray):
+                if len(a) != len(b):
+                    print("number of elements doesn't match")
+                    return False
+                nelements = len(a)
+                for i in range(nelements):
+                    if not array_equal(a[i], b[i]):
+                        return False
+            else:
+                # treat a string and bytes as equal if the utf-8 encoding
+                # of the string is equal to the byte encoding
+                if isinstance(a, str):
+                    a = a.encode("utf8")
+                if isinstance(b, str):
+                    b = b.encode("utf8")
+                if a != b:
+                    return False
+
+            return True
+
+        dt = special_dtype(vlen=np.dtype("int32"))
+        shape = [4,]
+        data = [
+            [1,],
+            [1, 2],
+            [1, 2, 3],
+            [1, 2, 3, 4],
+        ]
+        arr = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(arr, np.ndarray))
+        self.assertEqual(check_dtype(vlen=arr.dtype), np.dtype("int32"))
+        buffer = arrayToBytes(arr)
+        self.assertEqual(len(buffer), 56)
+
+        expected = bytearray(48)
+        expected[0:8] = b"\x04\x00\x00\x00\x01\x00\x00\x00"
+        expected[8:16] = b"\x08\x00\x00\x00\x01\x00\x00\x00"
+        expected[16:24] = b"\x02\x00\x00\x00\x0c\x00\x00\x00"
+        expected[24:32] = b"\x01\x00\x00\x00\x02\x00\x00\x00"
+        expected[32:40] = b"\x03\x00\x00\x00\x10\x00\x00\x00"
+        expected[40:48] = b"\x01\x00\x00\x00\x02\x00\x00\x00"
+        expected[48:56] = b"\x03\x00\x00\x00\x04\x00\x00\x00"
+        self.assertEqual(buffer, expected)
+
+        # convert back to array
+        arr_copy = bytesToArray(buffer, dt, shape)
+        # np.array_equal doesn't work for object arrays
+        self.assertEqual(arr.dtype, arr_copy.dtype)
+        self.assertEqual(arr.shape, arr_copy.shape)
+        for i in range(4):
+            e = arr[i]
+            e_copy = arr_copy[i]
+            self.assertTrue(np.array_equal(e, e_copy))
+        #
+        # Compound vlen
+        #
+        dt_str = special_dtype(vlen=str)
+        dt = np.dtype([("x", "i4"), ("tag", dt_str)])
+        shape = [4, ]
+        data = [[42, "Hello"], [0, 0], [0, 0], [84, "Bye"]]
+        arr = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(arr, np.ndarray))
+        buffer = arrayToBytes(arr)
+        self.assertEqual(len(buffer), 40)
+
+        expected = bytearray(40)
+        expected[0:8] = b"*\x00\x00\x00\x05\x00\x00\x00"
+        expected[8:19] = b"Hello\x00\x00\x00\x00\x00\x00"
+        expected[19:26] = b"\x00\x00\x00\x00\x00\x00\x00"
+        expected[26:40] = b"\x00\x00\x00T\x00\x00\x00\x03\x00\x00\x00Bye"
+
+        self.assertEqual(buffer, expected)
+
+        # convert back to array
+        arr_copy = bytesToArray(buffer, dt, (4,))
+        # np.array_equal doesn't work for object arrays
+        self.assertEqual(arr.dtype, arr_copy.dtype)
+        self.assertEqual(arr.shape, arr_copy.shape)
+        self.assertTrue(array_equal(arr, arr_copy))
+
+        #
+        # VLEN utf with array type
+        #
+        dt_str = special_dtype(vlen=str)
+        dt_arr_str = np.dtype((dt_str, (2,)))
+        dt = np.dtype([("x", "i4"), ("tag", dt_arr_str)])
+        shape = [4,]
+        data = [
+            [42, ["hi", "bye"]],
+            [0, [0, 0]],
+            [0, [0, 0]],
+            [84, ["hi-hi", "bye-bye"]],
+        ]
+        arr = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(arr, np.ndarray))
+        buffer = arrayToBytes(arr)
+        self.assertEqual(len(buffer), 81)
+        self.assertEqual(buffer.find(b"hi"), 8)
+        self.assertEqual(buffer.find(b"bye"), 14)
+        self.assertEqual(buffer.find(b"hi-hi"), 49)
+        self.assertEqual(buffer.find(b"bye-bye"), 58)
+        arr_copy = bytesToArray(buffer, dt, shape)
+
+        self.assertEqual(arr.dtype, arr_copy.dtype)
+        self.assertEqual(arr.shape, arr_copy.shape)
+        self.assertTrue(array_equal(e, e_copy))
+
+        #
+        # VLEN ascii with array type
+        #
+        dt_str = special_dtype(vlen=bytes)
+        dt_arr_str = np.dtype((dt_str, (2,)))
+        dt = np.dtype([("x", "i4"), ("tag", dt_arr_str)])
+        shape = [4,]
+        data = [
+            [42, [b"hi", b"bye"]],
+            [0, [0, 0]],
+            [0, [0, 0]],
+            [84, [b"hi-hi", b"bye-bye"]],
+        ]
+        arr = jsonToArray(shape, dt, data)
+        self.assertTrue(isinstance(arr, np.ndarray))
+        buffer = arrayToBytes(arr)
+        self.assertEqual(len(buffer), 81)
+        self.assertEqual(buffer.find(b"hi"), 8)
+        self.assertEqual(buffer.find(b"bye"), 14)
+        self.assertEqual(buffer.find(b"hi-hi"), 49)
+        self.assertEqual(buffer.find(b"bye-bye"), 58)
+        arr_copy = bytesToArray(buffer, dt, shape)
+
+        self.assertEqual(arr.dtype, arr_copy.dtype)
+        self.assertEqual(arr.shape, arr_copy.shape)
+        self.assertTrue(array_equal(e, e_copy))
+
+    def testIndexIterator(self):
+        i = 0
+        for index in IndexIterator((10,)):
+            self.assertEqual(index, (i,))
+            i += 1
+        self.assertEqual(i, 10)
+        i = 0
+        for index in IndexIterator((10,), sel=slice(0, 10, 2)):
+            self.assertEqual(index, (i,))
+
+            i += 2
+        self.assertEqual(i, 10)
+        i = 2
+        for index in IndexIterator((10, ), sel=slice(2, 8)):
+            self.assertEqual(index, (i,))
+            i += 1
+        self.assertEqual(i, 8)
+        cnt = 0
+        for index in IndexIterator((4, 5)):
+            cnt += 1
+        self.assertEqual(cnt, 20)
+        cnt = 0
+        for index in IndexIterator((8, 10), sel=(slice(0, 8, 2), slice(0, 10, 2))):
+            cnt += 1
+        self.assertEqual(cnt, 20)
+
+    def testGetNumpyValue(self):
+        # test int conversion
+        dt = np.dtype("<i4")
+        val = getNumpyValue(42, dt=dt)
+        self.assertTrue(isinstance(val, np.int32))
+        self.assertEqual(42, val)
+
+        # test fixed length string conversion
+        dt = np.dtype("S5")
+        val = getNumpyValue("hello", dt=dt)
+        self.assertTrue(isinstance(val, np.bytes_))
+        self.assertEqual(val, b"hello")
+
+        # test variable length string conversion
+        dt = special_dtype(vlen=bytes)
+        val = getNumpyValue("hello", dt=dt)
+        self.assertTrue(isinstance(val, str))
+        self.assertEqual(val, "hello")
+
+        # test compound type
+        dt = np.dtype([('int', "<i4"), ('str', "S4")])
+        val = getNumpyValue((42, "hdf5"), dt=dt)
+        self.assertTrue(isinstance(val, np.void))
+        self.assertEqual(val[0], 42)
+        self.assertEqual(val[1], b'hdf5')
+
+        # test array of ints
+        dt = np.dtype("<i4")
+        arr = np.array([0, 1], dtype=dt)
+        dt = np.dtype(("<i4", (len(arr),)))
+        val = getNumpyValue(arr, dt=dt)
+
+        self.assertTrue(np.array_equal(val, arr))
+        self.assertTrue(isinstance(val[0], np.int32))
+
+        # test array of floats
+        dt = np.dtype("f4")
+        arr = np.array([0.001, 1.001], dtype=dt)
+        val = getNumpyValue(arr, dt=np.dtype(("f4", (len(arr),))))
+
+        self.assertTrue(np.array_equal(val, arr))
+        self.assertTrue(isinstance(val[0], np.float32))
+
+        # test array of fixed-length strings
+        dt = np.dtype("S5")
+        arr = np.array([b'hello', b'world'], dtype=dt)
+        val = getNumpyValue(arr, dt=np.dtype(("S5", (len(arr),))))
+
+        self.assertTrue(np.array_equal(val, arr))
+        self.assertTrue(isinstance(val[0], np.bytes_))
+
+        # test nan string
+        dt = np.dtype("f4")
+        val = getNumpyValue("nan", dt=dt)
+        self.assertTrue(isinstance(val, np.float32))
+        self.assertTrue(val != val)
+
+    def testGetNumpyValueBase64Encoded(self):
+        # Set up value, numpy dtype, and expected type after decoding
+        value_info = []
+        value_info.append([42, np.dtype("<i4"), np.int32])  # int
+        value_info.append([1.001, np.dtype("f4"), np.float32])  # float
+        value_info.append([b"hello", np.dtype("S5"), np.bytes_])  # fixed-length string
+        value_info.append([(42, b'hdf5'),
+                           np.dtype([('int', "<i4"), ('str', "S4")]), np.void])  # compound type
+        np_values = []
+
+        for vi in value_info:
+            np_values.append(np.array(vi[0], dtype=vi[1]))
+
+        for i in range(len(np_values)):
+            numpy_dtype_out = value_info[i][2]
+
+            # Turn numpy array to bytes object which can be encoded
+            encoded_val = np_values[i].tobytes()
+            # Encode numpy bytes object
+            encoded_val = base64.b64encode(encoded_val)
+            # Decode from bytes object to regular string containing a base64 encoded numpy array
+            # This prevents the utf-8 encoding inside getNumpyValue from prepending b'
+            encoded_val = encoded_val.decode()
+            decoded_val = getNumpyValue(encoded_val, dt=np_values[i].dtype, encoding="base64")
+            self.assertTrue(isinstance(decoded_val, numpy_dtype_out))
+            self.assertEqual(decoded_val, np_values[i])
+
+        # test array types
+
+        # Set up value, numpy dtype, and expected type after decoding
+        value_info = []
+        value_info.append([np.array([0, 1], dtype=np.dtype("<i4")),
+                           np.dtype(("<i4", (2,))), np.int32])  # int array
+        value_info.append([np.array([0.001, 1.001], dtype=np.dtype("f4")),
+                           np.dtype(("f4", (2,))), np.float32])  # float array
+        value_info.append([np.array([b'hello', b'world'], dtype=np.dtype("S5")),
+                           np.dtype(("S5", (2,))), np.bytes_])  # fixed length string array
+
+        for i in range(len(value_info)):
+            this_array = value_info[i][0]
+            array_dtype = value_info[i][1]
+            array_dtype_out = value_info[i][2]
+
+            # Turn numpy array to bytes object which can be encoded
+            encoded_val = this_array.tobytes()
+            # Encode numpy bytes object
+            encoded_val = base64.b64encode(encoded_val)
+            # Decode from bytes object to regular string containing a base64 encoded numpy array
+            # This prevents the utf-8 encoding inside getNumpyValue from prepending b'
+            encoded_val = encoded_val.decode()
+            decoded_val = getNumpyValue(encoded_val, dt=array_dtype, encoding="base64")
+
+            self.assertTrue(np.array_equal(decoded_val, this_array))
+            self.assertTrue(isinstance(decoded_val[0], array_dtype_out))
+
+        # test invalid base64 length
+        try:
+            dt = np.dtype("<i8")
+            getNumpyValue("KgAAAA==", dt=dt, encoding="base64")
+            self.assertTrue(False)
+        except ValueError:
+            pass  # expected
+
+    def testJsonToArrayOnNoneArray(self):
+        data_dtype = np.dtype("i4")
+        data_shape = [0, ]
+        data_json = [None]
+        arr = None
+
+        try:
+            arr = jsonToArray(data_shape, data_dtype, data_json)
+        except Exception as e:
+            print(f"Exception while testing jsonToArray on array with None elements: {e}")
+
+        self.assertTrue(len(arr) == 0)
+        self.assertTrue(arr.dtype == data_dtype)
+
+    def testGetBroadcastShape(self):
+        bcshape = getBroadcastShape([1, ], 1)
+        self.assertEqual(bcshape, None)
+        bcshape = getBroadcastShape([2, 3], 6)
+        self.assertEqual(bcshape, None)
+        bcshape = getBroadcastShape([2, 3], 5)
+        self.assertEqual(bcshape, None)
+
+        bcshape = getBroadcastShape([4, 5], 1)
+        self.assertEqual(bcshape, [1, ])
+        bcshape = getBroadcastShape([4, 5], 5)
+        self.assertEqual(bcshape, [5, ])
+
+        bcshape = getBroadcastShape([2, 3, 5], 1)
+        self.assertEqual(bcshape, [1, ])
+        bcshape = getBroadcastShape([2, 3, 5], 5)
+        self.assertEqual(bcshape, [5, ])
+        bcshape = getBroadcastShape([2, 3, 5], 15)
+        self.assertEqual(bcshape, [3, 5])
+
+    def testJsonToArrayOnNoneCompoundArray(self):
+        # compound type
+        dt = np.dtype([("a", "i4"), ("b", "S5")])
+        shape = [1,]
+        data = None
+
+        arr = jsonToArray(shape, dt, data)
+
+        self.assertEqual(len(arr), 0)
+        self.assertEqual(arr.dtype, dt)
+
+
+if __name__ == "__main__":
+    # setup test files
+
+    unittest.main()
diff --git a/test/unit/h5json_reader_test.py b/test/unit/h5json_reader_test.py
new file mode 100644
index 0000000..f49a86a
--- /dev/null
+++ b/test/unit/h5json_reader_test.py
@@ -0,0 +1,99 @@
+##############################################################################
+# Copyright by The HDF Group.                                                #
+# All rights reserved.                                                       #
+#                                                                            #
+# This file is part of H5Serv (HDF5 REST Server) Service, Libraries and      #
+# Utilities.  The full HDF5 REST Server copyright notice, including          #
+# terms governing use, modification, and redistribution, is contained in     #
+# the file COPYING, which can be found at the root of the source code        #
+# distribution tree.  If you do not have access to this file, you may        #
+# request a copy from help@hdfgroup.org.                                     #
+##############################################################################
+import unittest
+import logging
+import numpy as np
+from h5json import Hdf5db
+from h5json.jsonstore.h5json_reader import H5JsonReader
+from h5json import selections
+
+
+class H5pyReaderTest(unittest.TestCase):
+    def __init__(self, *args, **kwargs):
+        super(H5pyReaderTest, self).__init__(*args, **kwargs)
+        # main
+
+        self.log = logging.getLogger()
+        if len(self.log.handlers) > 0:
+            lhStdout = self.log.handlers[0]  # stdout is the only handler initially
+        else:
+            lhStdout = None
+
+        self.log.setLevel(logging.INFO)
+        handler = logging.FileHandler("./h5json_reader_test.log")
+        # add handler to logger
+        self.log.addHandler(handler)
+
+        if lhStdout is not None:
+            self.log.removeHandler(lhStdout)
+
+    def testSimple(self):
+        filepath = "data/json/tall.json"
+        kwargs = {"app_logger": self.log}
+        with Hdf5db(**kwargs) as db:
+            h5_reader = H5JsonReader(filepath, **kwargs)
+            db.reader = h5_reader
+            root_id = db.getObjectIdByPath("/")
+            root_json = db.getObjectById(root_id)
+
+            root_attrs = root_json["attributes"]
+            self.assertEqual(len(root_attrs), 2)
+            self.assertEqual(list(root_attrs.keys()), ["attr1", "attr2"])
+            root_links = root_json["links"]
+            self.assertEqual(len(root_links), 2)
+            self.assertEqual(list(root_links.keys()), ["g1", "g2"])
+            g1_link = root_links["g1"]
+            self.assertEqual(g1_link["class"], "H5L_TYPE_HARD")
+            g1_id = g1_link["id"]
+            self.assertEqual(g1_id, db.getObjectIdByPath("/g1/"))
+            dset111_id = db.getObjectIdByPath("/g1/g1.1/dset1.1.1")
+            dset_json = db.getObjectById(dset111_id)
+            dset_type = dset_json["type"]
+            self.assertEqual(dset_type["class"], "H5T_INTEGER")
+            self.assertEqual(dset_type["base"], "H5T_STD_I32BE")
+            dset_attrs = dset_json["attributes"]
+            self.assertEqual(len(dset_attrs), 2)
+            self.assertEqual(list(dset_attrs.keys()), ["attr1", "attr2"])
+            dset_shape = dset_json["shape"]
+            self.assertEqual(dset_shape["class"], "H5S_SIMPLE")
+            self.assertEqual(dset_shape["dims"], [10, 10])
+            sel_all = selections.select((10, 10), ...)
+            arr = db.getDatasetValues(dset111_id, sel_all)
+            self.assertTrue(isinstance(arr, np.ndarray))
+            self.assertEqual(arr.shape, (10, 10))
+            for i in range(10):
+                for j in range(10):
+                    v = arr[i, j]
+                    self.assertEqual(v, i * j)
+
+            # try adding an attribute
+            db.createAttribute(dset111_id, "attr3", value=42)
+            dset_json = db.getObjectById(dset111_id)
+            dset_attrs = dset_json["attributes"]
+            self.assertEqual(len(dset_attrs), 3)
+            self.assertEqual(list(dset_attrs.keys()), ["attr1", "attr2", "attr3"])
+            attr3_json = dset_attrs["attr3"]
+            attr3_shape = attr3_json["shape"]
+            self.assertEqual(attr3_shape["class"], "H5S_SCALAR")
+            attr3_type = attr3_json["type"]
+            self.assertEqual(attr3_type["class"], "H5T_INTEGER")
+            self.assertEqual(attr3_type["base"], "H5T_STD_I64LE")
+            attr3_value = attr3_json["value"]
+            self.assertEqual(attr3_value, 42)
+
+            db.close()
+
+
+if __name__ == "__main__":
+    # setup test files
+
+    unittest.main()
diff --git a/test/unit/h5json_writer_test.py b/test/unit/h5json_writer_test.py
new file mode 100644
index 0000000..0f1fb59
--- /dev/null
+++ b/test/unit/h5json_writer_test.py
@@ -0,0 +1,345 @@
+##############################################################################
+# Copyright by The HDF Group.                                                #
+# All rights reserved.                                                       #
+#                                                                            #
+# This file is part of H5Serv (HDF5 REST Server) Service, Libraries and      #
+# Utilities.  The full HDF5 REST Server copyright notice, including          #
+# terms governing use, modification, and redistribution, is contained in     #
+# the file COPYING, which can be found at the root of the source code        #
+# distribution tree.  If you do not have access to this file, you may        #
+# request a copy from help@hdfgroup.org.                                     #
+##############################################################################
+import unittest
+import time
+import logging
+import numpy as np
+from h5json import Hdf5db
+from h5json.jsonstore.h5json_writer import H5JsonWriter
+from h5json.hdf5dtype import special_dtype, Reference
+from h5json import selections
+
+
+class H5JsonWriterTest(unittest.TestCase):
+    def __init__(self, *args, **kwargs):
+        super(H5JsonWriterTest, self).__init__(*args, **kwargs)
+        # main
+
+        self.log = logging.getLogger()
+        if len(self.log.handlers) > 0:
+            lhStdout = self.log.handlers[0]  # stdout is the only handler initially
+        else:
+            lhStdout = None
+
+        self.log.setLevel(logging.DEBUG)
+        # create logger
+
+        handler = logging.FileHandler("./hdf5dbtest.log")
+        # add handler to logger
+        self.log.addHandler(handler)
+
+        if lhStdout is not None:
+            self.log.removeHandler(lhStdout)
+        # self.log.propagate = False  # prevent log out going to stdout
+        self.log.info("init!")
+
+    def testSimple(self):
+
+        filepath = "test/unit/out/h5json_writer_testSimple.h5"
+
+        with Hdf5db(app_logger=self.log) as db:
+            db.writer = H5JsonWriter(filepath, app_logger=self.log)
+            root_id = db.getObjectIdByPath("/")
+            db.createAttribute(root_id, "attr1", value=[1, 2, 3, 4])
+            db.createAttribute(root_id, "attr2", 42)
+            g1_id = db.createGroup()
+            db.createHardLink(root_id, "g1", g1_id)
+            g2_id = db.createGroup()
+            db.createHardLink(root_id, "g2", g2_id)
+
+            g1_1_id = db.createGroup()
+            db.createHardLink(g1_id, "g1.1", g1_1_id)
+            dset_111_id = db.createDataset(shape=(10, 10), dtype=np.int32)
+            arr = np.zeros((10, 10), dtype=np.int32)
+            for i in range(10):
+                for j in range(10):
+                    arr[i, j] = i * j
+            sel_all = selections.select((10, 10), ...)
+            db.setDatasetValues(dset_111_id, sel_all, arr)
+            db.createHardLink(g1_1_id, "dset1.1.1", dset_111_id)
+            db.createSoftLink(g2_id, "slink", "somewhere")
+            db.createExternalLink(g2_id, "extlink", "somewhere", "someplace")
+            db.createCustomLink(g2_id, "cust", {"foo": "bar"})
+            db.flush()
+
+    def testNullSpaceAttribute(self):
+
+        filepath = "test/unit/out/h5json_writer_testNullSpaceAttribute.h5"
+
+        with Hdf5db(app_logger=self.log) as db:
+            db.writer = H5JsonWriter(filepath, app_logger=self.log)
+            root_id = db.getObjectIdByPath("/")
+            db.createAttribute(root_id, "A1", None, shape="H5S_NULL", dtype=np.int32)
+            item = db.getAttribute(root_id, "A1")
+            self.assertTrue("shape" in item)
+            shape_item = item["shape"]
+            self.assertTrue("class" in shape_item)
+            self.assertEqual(shape_item["class"], "H5S_NULL")
+            self.assertTrue(item["created"] > time.time() - 1.0)
+            value = db.getAttributeValue(root_id, "A1")
+            self.assertEqual(value, None)
+
+    def testScalarAttribute(self):
+        filepath = "test/unit/out/h5json_writer_testScalarAttribute.h5"
+
+        with Hdf5db(app_logger=self.log) as db:
+            db.writer = H5JsonWriter(filepath, app_logger=self.log)
+            root_id = db.getObjectIdByPath("/")
+            dims = ()
+            value = 42
+            db.createAttribute(root_id, "A1", value, shape=dims, dtype=np.int32)
+            item = db.getAttribute(root_id, "A1")
+            shape_json = item["shape"]
+            self.assertEqual(shape_json["class"], "H5S_SCALAR")
+            self.assertEqual(len(shape_json.keys()), 1)  # just one key should be returned
+            item_type = item["type"]
+            self.assertEqual(item_type["class"], "H5T_INTEGER")
+            self.assertEqual(item_type["base"], "H5T_STD_I32LE")
+            self.assertEqual(len(item_type.keys()), 2)  # just two keys should be returned
+            self.assertEqual(item["value"], 42)
+            now = int(time.time())
+            self.assertTrue(item["created"] > now - 1)
+            shape = item["shape"]
+            self.assertEqual(shape["class"], "H5S_SCALAR")
+
+            self.assertEqual(item_type["class"], "H5T_INTEGER")
+            self.assertEqual(item_type["base"], "H5T_STD_I32LE")
+
+    def testFixedStringAttribute(self):
+        filepath = "test/unit/out/h5json_writer_testFixedStringAttribute.h5"
+
+        with Hdf5db(app_logger=self.log) as db:
+            db.writer = H5JsonWriter(filepath, app_logger=self.log)
+            root_id = db.getObjectIdByPath("/")
+            value = "Hello, world!"
+            db.createAttribute(root_id, "A1", value, dtype=np.dtype("S13"))  # dims, datatype, value)
+            item = db.getAttribute(root_id, "A1")
+            shape_json = item["shape"]
+            self.assertEqual(shape_json["class"], "H5S_SCALAR")
+            item_type = item["type"]
+            self.assertEqual(item_type["class"], "H5T_STRING")
+            self.assertEqual(item_type["strPad"], "H5T_STR_NULLPAD")
+            self.assertEqual(item_type["length"], 13)
+            self.assertEqual(item_type["charSet"], "H5T_CSET_ASCII")
+            self.assertEqual(item["value"], "Hello, world!")
+            now = int(time.time())
+            self.assertTrue(item["created"] > now - 1)
+            ret_value = db.getAttributeValue(root_id, "A1")
+            self.assertEqual(ret_value, b'Hello, world!')
+
+    def testVlenAsciiAttribute(self):
+        filepath = "test/unit/out/h5json_writer_testVlenAsciiAttribute.h5"
+
+        with Hdf5db(app_logger=self.log) as db:
+            db.writer = H5JsonWriter(filepath, app_logger=self.log)
+            root_id = db.getObjectIdByPath("/")
+
+            value = b"Hello, world!"
+            dt = special_dtype(vlen=bytes)
+
+            # write the attribute
+            db.createAttribute(root_id, "A1", value, dtype=dt)
+            # read it back
+            item = db.getAttribute(root_id, "A1")
+            shape_json = item["shape"]
+            self.assertEqual(shape_json["class"], "H5S_SCALAR")
+            item_type = item["type"]
+            self.assertEqual(item_type["class"], "H5T_STRING")
+            self.assertEqual(item_type["strPad"], "H5T_STR_NULLTERM")
+            self.assertEqual(item_type["length"], "H5T_VARIABLE")
+            self.assertEqual(item_type["charSet"], "H5T_CSET_ASCII")
+            self.assertEqual(item["value"], "Hello, world!")
+            now = int(time.time())
+            self.assertTrue(item["created"] > now - 1)
+
+    def testVlenUtf8Attribute(self):
+        filepath = "test/unit/out/h5json_writer_testVlenutf8Attribute.h5"
+
+        with Hdf5db(app_logger=self.log) as db:
+            db.writer = H5JsonWriter(filepath, app_logger=self.log)
+            root_id = db.getObjectIdByPath("/")
+
+            value = b"Hello, world!"
+            dt = special_dtype(vlen=str)
+
+            # write the attribute
+            db.createAttribute(root_id, "A1", value, dtype=dt)
+            # read it back
+            item = db.getAttribute(root_id, "A1")
+            shape_json = item["shape"]
+            self.assertEqual(shape_json["class"], "H5S_SCALAR")
+            item_type = item["type"]
+            self.assertEqual(item_type["class"], "H5T_STRING")
+            self.assertEqual(item_type["strPad"], "H5T_STR_NULLTERM")
+            self.assertEqual(item_type["length"], "H5T_VARIABLE")
+            self.assertEqual(item_type["charSet"], "H5T_CSET_UTF8")
+            self.assertEqual(item["value"], "Hello, world!")
+            now = int(time.time())
+            self.assertTrue(item["created"] > now - 1)
+
+    def testIntAttribute(self):
+        filepath = "test/unit/out/h5json_writer_testIntAttribute.h5"
+
+        with Hdf5db(app_logger=self.log) as db:
+            db.writer = H5JsonWriter(filepath, app_logger=self.log)
+            root_id = db.getObjectIdByPath("/")
+            value = [2, 3, 5, 7, 11]
+            db.createAttribute(root_id, "A1", value, dtype=np.int16)
+            item = db.getAttribute(root_id, "A1")
+            self.assertEqual(item["value"], [2, 3, 5, 7, 11])
+            now = int(time.time())
+            self.assertTrue(item["created"] > now - 1)
+            item_shape = item["shape"]
+            self.assertEqual(item_shape["class"], "H5S_SIMPLE")
+            self.assertEqual(item_shape["dims"], [5,])
+            item_type = item["type"]
+            self.assertEqual(item_type["class"], "H5T_INTEGER")
+            self.assertEqual(item_type["base"], "H5T_STD_I16LE")
+
+    def testCreateReferenceAttribute(self):
+        filepath = "test/unit/out/h5json_writer_testCreateReferenceAttribute.h5"
+
+        with Hdf5db(app_logger=self.log) as db:
+            db.writer = H5JsonWriter(filepath, app_logger=self.log)
+            root_id = db.getObjectIdByPath("/")
+
+            dset_id = db.createDataset(shape=(), dtype=np.int32)
+            db.createHardLink(root_id, "DS1", dset_id)
+
+            dt = special_dtype(ref=Reference)
+
+            ds1_ref = "datasets/" + dset_id
+            value = [ds1_ref,]
+            db.createAttribute(root_id, "A1", value, dtype=dt)
+            item = db.getAttribute(root_id, "A1")
+            attr = db.getAttribute(root_id, "A1")
+            self.assertTrue("shape" in attr)
+
+            attr_type = attr["type"]
+            self.assertEqual(attr_type["class"], "H5T_REFERENCE")
+            self.assertEqual(attr_type["base"], "H5T_STD_REF_OBJ")
+            attr_value = item["value"]
+            self.assertEqual(len(attr_value), 1)
+            self.assertEqual(attr_value[0], ds1_ref)
+
+    def testCreateVlenReferenceAttribute(self):
+        filepath = "test/unit/out/h5json_writer_testVlenReferenceAttribute.h5"
+
+        with Hdf5db(app_logger=self.log) as db:
+            db.writer = H5JsonWriter(filepath, app_logger=self.log)
+            root_id = db.getObjectIdByPath("/")
+            dset_id = db.createDataset(shape=(), dtype=np.int32)
+            db.createHardLink(root_id, "DS1", dset_id)
+            grp_id = db.createGroup()
+            db.createHardLink(root_id, "G1", grp_id)
+
+            dt_base = special_dtype(ref=Reference)
+            dt = special_dtype(vlen=dt_base)
+
+            ds1_ref = "datasets/" + dset_id
+            grp_ref = "groups/" + grp_id
+            ref_arr = np.zeros((2,), dtype=dt_base)
+            ref_arr[0] = ds1_ref
+            ref_arr[1] = grp_ref
+            vlen_arr = np.zeros((), dtype=dt)
+            vlen_arr[()] = ref_arr
+
+            db.createAttribute(root_id, "A1", vlen_arr)
+            item = db.getAttribute(root_id, "A1")
+
+            item_type = item["type"]
+            self.assertEqual(item_type["class"], "H5T_VLEN")
+            self.assertEqual(item_type["size"], "H5T_VARIABLE")
+            base_type = item_type["base"]
+            self.assertEqual(base_type["class"], "H5T_REFERENCE")
+            self.assertEqual(base_type["base"], "H5T_STD_REF_OBJ")
+
+            item_shape = item["shape"]
+            self.assertEqual(item_shape["class"], "H5S_SCALAR")
+
+    def testCommittedType(self):
+        filepath = "test/unit/out/h5json_writer_testCommittedType.h5"
+
+        with Hdf5db(app_logger=self.log) as db:
+            db.writer = H5JsonWriter(filepath, app_logger=self.log)
+            root_id = db.getObjectIdByPath("/")
+            dt = np.dtype("S15")
+
+            ctype_id = db.createCommittedType(dt)
+            db.createHardLink(root_id, "ctype", ctype_id)
+            item = db.getObjectById(ctype_id)
+            now = int(time.time())
+            self.assertTrue(item["created"] > now - 1)
+
+            item_type = item["type"]
+
+            self.assertEqual(item_type["class"], "H5T_STRING")
+            self.assertEqual(item_type["strPad"], "H5T_STR_NULLPAD")
+            self.assertEqual(item_type["charSet"], "H5T_CSET_ASCII")
+            self.assertEqual(item_type["length"], 15)
+
+            # create an attribute using the committed type
+            db.createAttribute(root_id, "A1", "hello world!", dtype=f"datatypes/{ctype_id}")
+            attr = db.getAttribute(root_id, "A1")
+            self.assertEqual(attr["value"], "hello world!")
+
+            attr_type = attr["type"]
+            self.assertEqual(attr_type["class"], "H5T_STRING")
+            self.assertEqual(attr_type["length"], 15)
+            self.assertEqual(attr_type["charSet"], "H5T_CSET_ASCII")
+
+    def testCommittedCompoundType(self):
+        filepath = "test/unit/out/h5json_writer_testCommittedCompoundType.h5"
+
+        with Hdf5db(app_logger=self.log) as db:
+            db.writer = H5JsonWriter(filepath, app_logger=self.log)
+            root_id = db.getObjectIdByPath("/")
+
+            dt_str = special_dtype(vlen=str)
+            fields = []
+            fields.append(("field_1", np.dtype(">i8")))
+            fields.append(("field_2", ">f8"))
+            fields.append(("field_3", np.dtype("S15")))
+            fields.append(("field_4", dt_str))
+            dt = np.dtype(fields)
+
+            ctype_id = db.createCommittedType(dt)
+            db.createHardLink(root_id, "ctype", ctype_id)
+            item = db.getObjectById(ctype_id)
+            now = int(time.time())
+            self.assertTrue(item["created"] > now - 1)
+
+            item_type = item["type"]
+
+            self.assertEqual(item_type["class"], "H5T_COMPOUND")
+            fields = item_type["fields"]
+            self.assertEqual(len(fields), 4)
+
+            # create an attribute using the committed type
+            attr_value = (42, 3.14, "circle", "area = R^2 * PI")
+            db.createAttribute(root_id, "A1", attr_value, dtype=f"datatypes/{ctype_id}")
+            attr = db.getAttribute(root_id, "A1")
+            self.assertEqual(attr["value"], list(attr_value))
+            attr_shape = attr["shape"]
+            self.assertEqual(attr_shape["class"], "H5S_SCALAR")
+
+            attr_type = attr["type"]
+            self.assertEqual(attr_type["class"], "H5T_COMPOUND")
+
+            value = db.getAttributeValue(root_id, "A1")
+            self.assertTrue(isinstance(value, np.ndarray))
+
+
+if __name__ == "__main__":
+    # setup test files
+
+    unittest.main()
diff --git a/test/unit/h5py_reader_test.py b/test/unit/h5py_reader_test.py
new file mode 100644
index 0000000..7c11c4f
--- /dev/null
+++ b/test/unit/h5py_reader_test.py
@@ -0,0 +1,89 @@
+##############################################################################
+# Copyright by The HDF Group.                                                #
+# All rights reserved.                                                       #
+#                                                                            #
+# This file is part of H5Serv (HDF5 REST Server) Service, Libraries and      #
+# Utilities.  The full HDF5 REST Server copyright notice, including          #
+# terms governing use, modification, and redistribution, is contained in     #
+# the file COPYING, which can be found at the root of the source code        #
+# distribution tree.  If you do not have access to this file, you may        #
+# request a copy from help@hdfgroup.org.                                     #
+##############################################################################
+import unittest
+
+import logging
+from h5json import Hdf5db
+from h5json.h5pystore.h5py_reader import H5pyReader
+
+
+class H5pyReaderTest(unittest.TestCase):
+    def __init__(self, *args, **kwargs):
+        super(H5pyReaderTest, self).__init__(*args, **kwargs)
+        # main
+
+        self.log = logging.getLogger()
+        if len(self.log.handlers) > 0:
+            lhStdout = self.log.handlers[0]  # stdout is the only handler initially
+        else:
+            lhStdout = None
+
+        self.log.setLevel(logging.INFO)
+        handler = logging.FileHandler("./hdf5dbtest.log")
+        # add handler to logger
+        self.log.addHandler(handler)
+
+        if lhStdout is not None:
+            self.log.removeHandler(lhStdout)
+
+    def testSimple(self):
+        filepath = "data/hdf5/tall.h5"
+        kwargs = {"app_logger": self.log}
+        with Hdf5db(h5_reader=H5pyReader(filepath, **kwargs), **kwargs) as db:
+            root_id = db.getObjectIdByPath("/")
+            print("got root_id:", root_id)
+            root_json = db.getObjectById(root_id)
+
+            root_attrs = root_json["attributes"]
+            self.assertEqual(len(root_attrs), 2)
+            self.assertEqual(list(root_attrs.keys()), ["attr1", "attr2"])
+            root_links = root_json["links"]
+            self.assertEqual(len(root_links), 2)
+            self.assertEqual(list(root_links.keys()), ["g1", "g2"])
+            g1_link = root_links["g1"]
+            self.assertEqual(g1_link["class"], "H5L_TYPE_HARD")
+            g1_id = g1_link["id"]
+            self.assertEqual(g1_id, db.getObjectIdByPath("/g1/"))
+            dset111_id = db.getObjectIdByPath("/g1/g1.1/dset1.1.1")
+            dset_json = db.getObjectById(dset111_id)
+            dset_type = dset_json["type"]
+            self.assertEqual(dset_type["class"], "H5T_INTEGER")
+            self.assertEqual(dset_type["base"], "H5T_STD_I32BE")
+            dset_attrs = dset_json["attributes"]
+            self.assertEqual(len(dset_attrs), 2)
+            self.assertEqual(list(dset_attrs.keys()), ["attr1", "attr2"])
+            dset_shape = dset_json["shape"]
+            self.assertEqual(dset_shape["class"], "H5S_SIMPLE")
+            self.assertEqual(dset_shape["dims"], [10, 10])
+
+            # try adding an attribute
+            db.createAttribute(dset111_id, "attr3", value=42)
+            dset_json = db.getObjectById(dset111_id)
+            dset_attrs = dset_json["attributes"]
+            self.assertEqual(len(dset_attrs), 3)
+            self.assertEqual(list(dset_attrs.keys()), ["attr1", "attr2", "attr3"])
+            attr3_json = dset_attrs["attr3"]
+            attr3_shape = attr3_json["shape"]
+            self.assertEqual(attr3_shape["class"], "H5S_SCALAR")
+            attr3_type = attr3_json["type"]
+            self.assertEqual(attr3_type["class"], "H5T_INTEGER")
+            self.assertEqual(attr3_type["base"], "H5T_STD_I64LE")
+            attr3_value = attr3_json["value"]
+            self.assertEqual(attr3_value, 42)
+
+            db.close()
+
+
+if __name__ == "__main__":
+    # setup test files
+
+    unittest.main()
diff --git a/test/unit/h5py_writer_test.py b/test/unit/h5py_writer_test.py
new file mode 100644
index 0000000..3ff91be
--- /dev/null
+++ b/test/unit/h5py_writer_test.py
@@ -0,0 +1,590 @@
+##############################################################################
+# Copyright by The HDF Group.                                                #
+# All rights reserved.                                                       #
+#                                                                            #
+# This file is part of H5Serv (HDF5 REST Server) Service, Libraries and      #
+# Utilities.  The full HDF5 REST Server copyright notice, including          #
+# terms governing use, modification, and redistribution, is contained in     #
+# the file COPYING, which can be found at the root of the source code        #
+# distribution tree.  If you do not have access to this file, you may        #
+# request a copy from help@hdfgroup.org.                                     #
+##############################################################################
+import unittest
+import time
+import logging
+import os
+
+import h5py
+import numpy as np
+from h5json import Hdf5db
+from h5json.jsonstore.h5json_reader import H5JsonReader
+from h5json.h5pystore.h5py_writer import H5pyWriter
+from h5json.hdf5dtype import special_dtype, Reference
+from h5json import selections
+
+
+class H5pyWriterTest(unittest.TestCase):
+    def __init__(self, *args, **kwargs):
+        super(H5pyWriterTest, self).__init__(*args, **kwargs)
+        # main
+
+        self.log = logging.getLogger()
+        if len(self.log.handlers) > 0:
+            lhStdout = self.log.handlers[0]  # stdout is the only handler initially
+        else:
+            lhStdout = None
+
+        self.log.setLevel(logging.DEBUG)
+        # create logger
+
+        handler = logging.FileHandler("./h5pywriterbtest.log")
+        # add handler to logger
+        self.log.addHandler(handler)
+
+        if lhStdout is not None:
+            self.log.removeHandler(lhStdout)
+        # self.log.propagate = False  # prevent log out going to stdout
+        self.log.info("init!")
+
+    def testSimple(self):
+
+        filepath = "test/unit/out/h5py_writer_test_testSimple.h5"
+        if os.path.isfile(filepath):
+            os.remove(filepath)  # cleanup any previous run
+
+        db = Hdf5db(app_logger=self.log)
+        db.writer = H5pyWriter(filepath, no_data=False)
+        root_id = db.open()
+        self.assertEqual(db.getObjectIdByPath("/"), root_id)
+        db.createAttribute(root_id, "attr1", value=[1, 2, 3, 4])
+        db.createAttribute(root_id, "attr2", 42)
+        g1_id = db.createGroup()
+        db.createHardLink(root_id, "g1", g1_id)
+        db.createAttribute(g1_id, "a1", "hello")
+        db.close()
+
+        # open file with h5py and verify changes
+        with h5py.File(filepath) as f:
+            self.assertTrue("attr1", f.attrs)
+            self.assertTrue("attr2", f.attrs)
+            self.assertEqual(len(f), 1)
+            self.assertTrue("g1" in f)
+            g1 = f["g1"]
+            self.assertTrue("a1" in g1.attrs)
+            self.assertEqual(len(g1), 0)
+
+        db.open()
+        g2_id = db.createGroup()
+        db.createHardLink(root_id, "g2", g2_id)
+
+        g1_1_id = db.createGroup()
+        db.createHardLink(g1_id, "g1.1", g1_1_id)
+        dset_111_id = db.createDataset(shape=(10, 10), dtype=np.int32)
+        arr = np.zeros((10, 10), dtype=np.int32)
+        for i in range(10):
+            for j in range(10):
+                arr[i, j] = i * j
+        sel_all = selections.select((10, 10), ...)
+        db.setDatasetValues(dset_111_id, sel_all, arr)
+        db.createHardLink(g1_1_id, "dset1.1.1", dset_111_id)
+        db.createSoftLink(g2_id, "slink", "somewhere")
+        db.createExternalLink(g2_id, "extlink", "somewhere", "someplace")
+        db.createCustomLink(g2_id, "cust", {"foo": "bar"})
+        db.close()
+
+        # open file with h5py and verify changes
+        with h5py.File(filepath) as f:
+            self.assertTrue("attr1", f.attrs)
+            self.assertTrue("attr2", f.attrs)
+            self.assertTrue("g1" in f)
+            g1 = f["g1"]
+            self.assertTrue("a1" in g1.attrs)
+            self.assertTrue("g1.1" in g1)
+            g11 = g1["g1.1"]
+            self.assertTrue("dset1.1.1" in g11)
+            dset = g11["dset1.1.1"]
+            self.assertEqual(dset.shape, (10, 10))
+            for i in range(10):
+                for j in range(10):
+                    self.assertEqual(dset[i, j], i * j)
+            self.assertTrue("g2" in f)
+            g2 = f["g2"]
+            self.assertTrue("extlink" in g2)
+            self.assertTrue("slink" in g2)
+
+        db.open()
+        db.createAttribute(g1_id, "a2", "bye-bye")
+        db.close()
+
+        with h5py.File(filepath) as f:
+            g1 = f["g1"]
+            self.assertEqual(len(g1.attrs), 2)
+            self.assertTrue("a1" in g1.attrs)
+            self.assertTrue("a2" in g1.attrs)
+
+        db.open()
+        g21 = db.createGroup()
+        db.createHardLink(g2_id, "g2.1", g21)
+        db.close()
+
+        with h5py.File(filepath) as f:
+            g2 = f["g2"]
+            self.assertTrue("g2.1" in g2)
+
+        db.open()
+        sel = selections.select((10, 10), (slice(4, 5), slice(4, 5)))
+        arr = np.zeros((), dtype=np.int32)
+        arr[()] = 42
+        db.setDatasetValues(dset_111_id, sel, arr)
+        db.close()
+
+        with h5py.File(filepath) as f:
+            dset = f["/g1/g1.1/dset1.1.1"]
+            for i in range(10):
+                for j in range(10):
+                    if i == 4 and j == 4:
+                        # this is the one element that was updated
+                        expected = 42
+                    else:
+                        expected = i * j
+                    self.assertEqual(dset[i, j], expected)
+
+    def testNullSpaceAttribute(self):
+
+        filepath = "test/unit/out/h5py_writer_test_testNullSpaceAttribute.h5"
+        if os.path.isfile(filepath):
+            os.remove(filepath)  # cleanup any previous run
+        db = Hdf5db(app_logger=self.log)
+        db.writer = H5pyWriter(filepath, no_data=False)
+        root_id = db.open()
+        db.createAttribute(root_id, "A1", None, shape="H5S_NULL", dtype=np.int32)
+        item = db.getAttribute(root_id, "A1")
+        self.assertTrue("shape" in item)
+        shape_item = item["shape"]
+        self.assertTrue("class" in shape_item)
+        self.assertEqual(shape_item["class"], "H5S_NULL")
+        self.assertTrue(item["created"] > time.time() - 1.0)
+        value = db.getAttributeValue(root_id, "A1")
+        self.assertEqual(value, None)
+        db.close()
+
+        with h5py.File(filepath) as f:
+            self.assertTrue("A1" in f.attrs)
+            self.assertEqual(f.attrs["A1"], h5py.Empty(dtype=np.int32))
+
+    def testScalarAttribute(self):
+
+        filepath = "test/unit/out/h5py_writer_test_testNullScalarAttribute.h5"
+        if os.path.isfile(filepath):
+            os.remove(filepath)  # cleanup any previous run
+        db = Hdf5db(app_logger=self.log)
+        db.writer = H5pyWriter(filepath, no_data=False)
+        root_id = db.open()
+        dims = ()
+        value = 42
+        db.createAttribute(root_id, "A1", value, shape=dims, dtype=np.int32)
+        item = db.getAttribute(root_id, "A1")
+        shape_json = item["shape"]
+        self.assertEqual(shape_json["class"], "H5S_SCALAR")
+        self.assertEqual(len(shape_json.keys()), 1)  # just one key should be returned
+        item_type = item["type"]
+        self.assertEqual(item_type["class"], "H5T_INTEGER")
+        self.assertEqual(item_type["base"], "H5T_STD_I32LE")
+        self.assertEqual(len(item_type.keys()), 2)  # just two keys should be returned
+        self.assertEqual(item["value"], 42)
+        now = int(time.time())
+        self.assertTrue(item["created"] > now - 1)
+        shape = item["shape"]
+        self.assertEqual(shape["class"], "H5S_SCALAR")
+        self.assertEqual(item_type["class"], "H5T_INTEGER")
+        self.assertEqual(item_type["base"], "H5T_STD_I32LE")
+        db.close()
+
+        with h5py.File(filepath) as f:
+            self.assertTrue("A1" in f.attrs)
+            a1 = f.attrs["A1"]
+            self.assertTrue(isinstance(a1, np.int32))
+            self.assertEqual(a1, 42)
+
+    def testFixedStringAttribute(self):
+
+        filepath = "test/unit/out/h5py_writer_test_testFixedStringAttribute.h5"
+        if os.path.isfile(filepath):
+            os.remove(filepath)  # cleanup any previous run
+        db = Hdf5db(app_logger=self.log)
+        db.writer = H5pyWriter(filepath, no_data=False)
+        root_id = db.open()
+        value = "Hello, world!"
+        db.createAttribute(root_id, "A1", value, dtype=np.dtype("S13"))  # dims, datatype, value)
+        item = db.getAttribute(root_id, "A1")
+        shape_json = item["shape"]
+        self.assertEqual(shape_json["class"], "H5S_SCALAR")
+        item_type = item["type"]
+        self.assertEqual(item_type["class"], "H5T_STRING")
+        self.assertEqual(item_type["strPad"], "H5T_STR_NULLPAD")
+        self.assertEqual(item_type["length"], 13)
+        self.assertEqual(item_type["charSet"], "H5T_CSET_ASCII")
+        self.assertEqual(item["value"], "Hello, world!")
+        now = int(time.time())
+        self.assertTrue(item["created"] > now - 1)
+        db.close()
+
+        with h5py.File(filepath) as f:
+            self.assertTrue("A1" in f.attrs)
+            a1 = f.attrs["A1"]
+            self.assertTrue(isinstance(a1, bytes))
+            self.assertEqual(a1, b'Hello, world!')
+
+    def testVlenAsciiAttribute(self):
+
+        filepath = "test/unit/out/h5py_writer_test_testVlenAsciiAttribute.h5"
+        if os.path.isfile(filepath):
+            os.remove(filepath)  # cleanup any previous run
+        value = b"Hello, world!"
+
+        db = Hdf5db(app_logger=self.log)
+        db.writer = H5pyWriter(filepath, no_data=False)
+        root_id = db.open()
+        dt = special_dtype(vlen=bytes)
+        # write the attribute
+        db.createAttribute(root_id, "A1", value, dtype=dt)
+        # read it back
+        item = db.getAttribute(root_id, "A1")
+        shape_json = item["shape"]
+        self.assertEqual(shape_json["class"], "H5S_SCALAR")
+        item_type = item["type"]
+        self.assertEqual(item_type["class"], "H5T_STRING")
+        self.assertEqual(item_type["strPad"], "H5T_STR_NULLTERM")
+        self.assertEqual(item_type["length"], "H5T_VARIABLE")
+        self.assertEqual(item_type["charSet"], "H5T_CSET_ASCII")
+        self.assertEqual(item["value"], "Hello, world!")
+        now = int(time.time())
+        self.assertTrue(item["created"] > now - 1)
+        db.close()
+
+        with h5py.File(filepath) as f:
+            self.assertTrue("A1" in f.attrs)
+            a1 = f.attrs["A1"]
+            self.assertTrue(isinstance(a1, str))
+            self.assertEqual(a1, value.decode("ascii"))
+
+    def testVlenUtf8Attribute(self):
+
+        filepath = "test/unit/out/h5py_writer_test_testVlenUtf8Attribute.h5"
+        if os.path.isfile(filepath):
+            os.remove(filepath)  # cleanup any previous run
+        value = "one: \u4e00"
+
+        db = Hdf5db(app_logger=self.log)
+        db.writer = H5pyWriter(filepath, no_data=False)
+        root_id = db.open()
+        dt = special_dtype(vlen=str)
+        # write the attribute
+        db.createAttribute(root_id, "A1", value, dtype=dt)
+        # read it back
+        item = db.getAttribute(root_id, "A1")
+        shape_json = item["shape"]
+        self.assertEqual(shape_json["class"], "H5S_SCALAR")
+        item_type = item["type"]
+        self.assertEqual(item_type["class"], "H5T_STRING")
+        self.assertEqual(item_type["strPad"], "H5T_STR_NULLTERM")
+        self.assertEqual(item_type["length"], "H5T_VARIABLE")
+        self.assertEqual(item_type["charSet"], "H5T_CSET_UTF8")
+        self.assertEqual(item["value"], value)
+        now = int(time.time())
+        self.assertTrue(item["created"] > now - 1)
+        db.close()
+
+        with h5py.File(filepath) as f:
+            self.assertTrue("A1" in f.attrs)
+            a1 = f.attrs["A1"]
+            self.assertTrue(isinstance(a1, str))
+            self.assertEqual(a1, value)
+
+    def testIntAttribute(self):
+
+        filepath = "test/unit/out/h5py_writer_test_testIntAttribute.h5"
+        if os.path.isfile(filepath):
+            os.remove(filepath)  # cleanup any previous run
+        value = [2, 3, 5, 7, 11]
+
+        db = Hdf5db(app_logger=self.log)
+        db.writer = H5pyWriter(filepath, no_data=False)
+        root_id = db.open()
+        db.createAttribute(root_id, "A1", value, dtype=np.int16)
+        item = db.getAttribute(root_id, "A1")
+        self.assertEqual(item["value"], [2, 3, 5, 7, 11])
+        now = int(time.time())
+        self.assertTrue(item["created"] > now - 1)
+        item_shape = item["shape"]
+        self.assertEqual(item_shape["class"], "H5S_SIMPLE")
+        self.assertEqual(item_shape["dims"], [5,])
+        item_type = item["type"]
+        self.assertEqual(item_type["class"], "H5T_INTEGER")
+        self.assertEqual(item_type["base"], "H5T_STD_I16LE")
+        db.close()
+
+        with h5py.File(filepath) as f:
+            self.assertTrue("A1" in f.attrs)
+            a1 = f.attrs["A1"]
+            self.assertTrue(isinstance(a1, np.ndarray))
+            self.assertEqual(a1.shape, (5,))
+            for i in range(5):
+                self.assertEqual(a1[i], value[i])
+
+    def testCreateReferenceAttribute(self):
+
+        filepath = "test/unit/out/h5py_writer_test_testCreateReferenceAttribute.h5"
+        if os.path.isfile(filepath):
+            os.remove(filepath)  # cleanup any previous run
+        db = Hdf5db(app_logger=self.log)
+        db.writer = H5pyWriter(filepath, no_data=False)
+        root_id = db.open()
+        dset_id = db.createDataset(shape=(), dtype=np.int32)
+        db.createHardLink(root_id, "DS1", dset_id)
+        dt = special_dtype(ref=Reference)
+        ds1_ref = "datasets/" + dset_id
+        value = [ds1_ref,]
+        db.createAttribute(root_id, "A1", value, dtype=dt)
+        attr = db.getAttribute(root_id, "A1")
+        self.assertTrue("shape" in attr)
+        attr_type = attr["type"]
+        self.assertEqual(attr_type["class"], "H5T_REFERENCE")
+        self.assertEqual(attr_type["base"], "H5T_STD_REF_OBJ")
+        attr_value = db.getAttributeValue(root_id, "A1")
+        self.assertEqual(len(attr_value), 1)
+        self.assertEqual(attr_value[0], ds1_ref.encode('ascii'))
+        db.close()
+
+        with h5py.File(filepath) as f:
+            self.assertTrue("A1" in f.attrs)
+            a1 = f.attrs["A1"]
+            obj_ref = a1[0]
+            obj = f[obj_ref]
+            self.assertEqual(obj.name, "/DS1")
+
+    def testCreateVlenReferenceAttribute(self):
+
+        filepath = "test/unit/out/h5py_writer_test_testVlenReferenceAttribute.h5"
+        if os.path.isfile(filepath):
+            os.remove(filepath)  # cleanup any previous run
+        db = Hdf5db(app_logger=self.log)
+        db.writer = H5pyWriter(filepath, no_data=False)
+        root_id = db.open()
+        dset_id = db.createDataset(shape=(), dtype=np.int32)
+        db.createHardLink(root_id, "DS1", dset_id)
+        grp_id = db.createGroup()
+        db.createHardLink(root_id, "G1", grp_id)
+        dt_base = special_dtype(ref=Reference)
+        dt = special_dtype(vlen=dt_base)
+        ds1_ref = "datasets/" + dset_id
+        grp_ref = "groups/" + grp_id
+        ref_arr = np.zeros((2,), dtype=dt_base)
+        ref_arr[0] = ds1_ref
+        ref_arr[1] = grp_ref
+        vlen_arr = np.zeros((), dtype=dt)
+        vlen_arr[()] = ref_arr
+        db.createAttribute(root_id, "A1", vlen_arr)
+        item = db.getAttribute(root_id, "A1")
+        item_type = item["type"]
+        self.assertEqual(item_type["class"], "H5T_VLEN")
+        self.assertEqual(item_type["size"], "H5T_VARIABLE")
+        base_type = item_type["base"]
+        self.assertEqual(base_type["class"], "H5T_REFERENCE")
+        self.assertEqual(base_type["base"], "H5T_STD_REF_OBJ")
+        item_shape = item["shape"]
+        self.assertEqual(item_shape["class"], "H5S_SCALAR")
+        db.close()
+
+        with h5py.File(filepath) as f:
+            self.assertTrue("DS1" in f)
+            ds1 = f["DS1"]
+            self.assertTrue(ds1)
+            self.assertTrue("G1" in f)
+            g1 = f["G1"]
+            self.assertTrue(g1)
+            self.assertTrue("A1" in f.attrs)
+            a1 = f.attrs["A1"]
+            ref_obj = f[a1[0]]
+            self.assertEqual(ref_obj.name, "/DS1")
+
+    def testCommittedType(self):
+
+        filepath = "test/unit/out/h5py_writer_test_testCommittedType.h5"
+        if os.path.isfile(filepath):
+            os.remove(filepath)  # cleanup any previous run
+        dt = np.dtype("S15")
+
+        db = Hdf5db(app_logger=self.log)
+        db.writer = H5pyWriter(filepath, no_data=False)
+        root_id = db.open()
+        ctype_id = db.createCommittedType(dt)
+        db.createHardLink(root_id, "ctype", ctype_id)
+        item = db.getObjectById(ctype_id)
+        now = int(time.time())
+        self.assertTrue(item["created"] > now - 1)
+        db.createHardLink(root_id, "T1", ctype_id)
+        item_type = item["type"]
+        self.assertEqual(item_type["class"], "H5T_STRING")
+        self.assertEqual(item_type["strPad"], "H5T_STR_NULLPAD")
+        self.assertEqual(item_type["charSet"], "H5T_CSET_ASCII")
+        self.assertEqual(item_type["length"], 15)
+        # create an attribute using the committed type
+        db.createAttribute(root_id, "A1", "hello world!", dtype=f"datatypes/{ctype_id}")
+        attr = db.getAttribute(root_id, "A1")
+        self.assertEqual(attr["value"], "hello world!")
+        attr_type = attr["type"]
+        self.assertEqual(attr_type["class"], "H5T_STRING")
+        self.assertEqual(attr_type["length"], 15)
+        self.assertEqual(attr_type["charSet"], "H5T_CSET_ASCII")
+        db.close()
+
+        with h5py.File(filepath) as f:
+            self.assertTrue("T1" in f)
+            t1 = f["T1"]
+            self.assertTrue(isinstance(t1, h5py.Datatype))
+            self.assertEqual(t1.dtype, dt)
+
+            self.assertTrue("A1" in f.attrs)
+            a1 = f.attrs["A1"]
+            self.assertEqual(a1, b"hello world!")
+
+    def testCommittedCompoundType(self):
+
+        filepath = "test/unit/out/h5py_writer_test_testCommittedCompoundType.h5"
+        if os.path.isfile(filepath):
+            os.remove(filepath)  # cleanup any previous run
+
+        db = Hdf5db(app_logger=self.log)
+        db.writer = H5pyWriter(filepath, no_data=False)
+        root_id = db.open()
+        dt_str = special_dtype(vlen=str)
+        fields = []
+        fields.append(("field_1", np.dtype(">i8")))
+        fields.append(("field_2", np.dtype(">f8")))
+        fields.append(("field_3", np.dtype("S15")))
+        fields.append(("field_4", dt_str))
+        dt = np.dtype(fields)
+        ctype_id = db.createCommittedType(dt)
+        db.createHardLink(root_id, "ctype", ctype_id)
+        item = db.getObjectById(ctype_id)
+        now = int(time.time())
+        self.assertTrue(item["created"] > now - 1)
+        db.createHardLink(root_id, "T1", ctype_id)
+        item_type = item["type"]
+        self.assertEqual(item_type["class"], "H5T_COMPOUND")
+        fields = item_type["fields"]
+        self.assertEqual(len(fields), 4)
+        # create an attribute using the committed type
+        attr_value = (42, 3.14, "circle", "area = R^2 * PI")
+        db.createAttribute(root_id, "A1", attr_value, dtype=f"datatypes/{ctype_id}")
+        attr = db.getAttribute(root_id, "A1")
+        self.assertEqual(attr["value"], list(attr_value))
+        attr_shape = attr["shape"]
+        self.assertEqual(attr_shape["class"], "H5S_SCALAR")
+        attr_type = attr["type"]
+        self.assertEqual(attr_type["class"], "H5T_COMPOUND")
+        arr = db.getAttributeValue(root_id, "A1")
+        self.assertTrue(isinstance(arr, np.ndarray))
+        db.close()
+
+        with h5py.File(filepath) as f:
+            self.assertTrue("T1" in f)
+            t1 = f["T1"]
+            self.assertTrue(isinstance(t1, h5py.Datatype))
+            self.assertEqual(len(t1.dtype), 4)
+            sub_dt = t1.dtype["field_1"]
+            self.assertEqual(sub_dt, np.dtype(">i8"))
+            sub_dt = t1.dtype["field_2"]
+            self.assertEqual(sub_dt, np.dtype(">f8"))
+            sub_dt = t1.dtype["field_3"]
+            self.assertEqual(sub_dt, np.dtype("S15"))
+            sub_dt = t1.dtype["field_4"]
+            self.assertEqual(sub_dt, h5py.special_dtype(vlen=str))
+
+    def testReaderWithUpdate(self):
+
+        file_in = "data/json/tall.json"
+        file_out = "test/unit/out/h5py_writer_test_testReaderWithUpdate.h5"
+        if os.path.isfile(file_out):
+            os.remove(file_out)  # cleanup any previous run
+
+        db = Hdf5db(app_logger=self.log)
+        db.reader = H5JsonReader(file_in)
+        db.writer = H5pyWriter(file_out)
+        db.open()
+        # close should create everything the json reader read to the output file
+        db.close()
+
+        with h5py.File(file_out) as f:
+            self.assertTrue("/g1/g1.1/dset1.1.1" in f)
+            dset111 = f["/g1/g1.1/dset1.1.1"]
+            self.assertEqual(len(dset111.attrs), 2)
+
+        db.open()
+        dset111_id = db.getObjectIdByPath("/g1/g1.1/dset1.1.1")
+        db.createAttribute(dset111_id, "attr3", "hello")
+        db.close()
+
+        with h5py.File(file_out) as f:
+            self.assertTrue("/g1/g1.1/dset1.1.1" in f)
+            dset111 = f["/g1/g1.1/dset1.1.1"]
+            self.assertEqual(len(dset111.attrs), 3)
+            self.assertEqual(dset111.attrs["attr3"], b"hello")
+
+        db.open()
+        db.createAttribute(dset111_id, "attr3", "bye-bye")
+        db.close()
+
+        with h5py.File(file_out) as f:
+            self.assertTrue("/g1/g1.1/dset1.1.1" in f)
+            dset111 = f["/g1/g1.1/dset1.1.1"]
+            self.assertEqual(len(dset111.attrs), 3)
+            self.assertEqual(dset111.attrs["attr3"], b"bye-bye")
+            g1 = f["g1"]
+
+        db.open()
+        # create a new group
+        g13_id = db.createGroup()
+        g1_id = db.getObjectIdByPath("/g1")
+        db.createHardLink(g1_id, "g1.3", g13_id)
+        db.close()
+
+        with h5py.File(file_out) as f:
+            g1 = f["g1"]
+            self.assertEqual(len(g1), 3)
+            self.assertTrue("g1.3" in g1)
+
+        db.open()
+        # create a new dataset
+        dset_id = db.createDataset(shape=(10, 10), dtype=np.int32)
+        db.createHardLink(g1_id, "DS1", dset_id)
+        db.close()
+
+        with h5py.File(file_out) as f:
+            g1 = f["g1"]
+            self.assertTrue("DS1" in g1)
+            ds1 = g1["DS1"]
+            self.assertEqual(ds1.shape, (10, 10))
+
+        db.open()
+        arr = np.asarray(range(10), dtype=np.int32)
+        sel = selections.select((10, 10), (slice(5, 6), slice(0, 10)))
+        db.setDatasetValues(dset_id, sel, arr)
+        db.close()
+
+        with h5py.File(file_out) as f:
+            ds1 = f["/g1/DS1"]
+            data = ds1[:, :]
+            for i in range(10):
+                for j in range(10):
+                    if i == 5:
+                        self.assertEqual(data[i, j], j)
+                    else:
+                        self.assertEqual(data[i, j], 0)
+
+
+if __name__ == "__main__":
+    # setup test files
+
+    unittest.main()
diff --git a/test/unit/hdf5db_test.py b/test/unit/hdf5db_test.py
index 6a310c6..cbd7c87 100755
--- a/test/unit/hdf5db_test.py
+++ b/test/unit/hdf5db_test.py
@@ -10,42 +10,13 @@
 # request a copy from help@hdfgroup.org.                                     #
 ##############################################################################
 import unittest
-import os
 import time
-import errno
-import os.path as op
-import stat
 import logging
-import shutil
+import numpy as np
 from h5json import Hdf5db
-
-
-UUID_LEN = 36  # length for uuid strings
-
-
-def getFile(name, tgt, ro=False):
-    src = "data/hdf5/" + name
-    logging.info("copying file to this directory: " + src)
-
-    filepath = "./out/" + tgt
-
-    if op.isfile(filepath):
-        # make sure it's writable, before we copy over it
-        os.chmod(filepath, stat.S_IWRITE | stat.S_IREAD)
-    shutil.copyfile(src, filepath)
-    if ro:
-        logging.info("make read-only")
-        os.chmod(filepath, stat.S_IREAD)
-    return filepath
-
-
-def removeFile(name):
-    try:
-        os.stat(name)
-    except OSError:
-        return
-        # file does not exist
-    os.remove(name)
+from h5json import selections
+from h5json.objid import isRootObjId, isValidUuid, isSchema2Id
+from h5json.hdf5dtype import special_dtype, Reference
 
 
 class Hdf5dbTest(unittest.TestCase):
@@ -59,7 +30,7 @@ def __init__(self, *args, **kwargs):
         else:
             lhStdout = None
 
-        self.log.setLevel(logging.INFO)
+        self.log.setLevel(logging.DEBUG)
         # create logger
 
         handler = logging.FileHandler("./hdf5dbtest.log")
@@ -71,778 +42,218 @@ def __init__(self, *args, **kwargs):
         # self.log.propagate = False  # prevent log out going to stdout
         self.log.info("init!")
 
-        # create directory for test output files
-        if not os.path.exists("./out"):
-            os.makedirs("./out")
-
-    def testInvalidPath(self):
-        filepath = "/tmp/thisisnotafile.h5"
-        try:
-            with Hdf5db(filepath, app_logger=self.log) as db:
-                self.log.error(f"Unexpected Hdf5db ref: {db}")
-                self.assertTrue(False)  # shouldn't get here
-        except IOError as e:
-            self.assertEqual(e.errno, errno.ENXIO)
-            self.assertEqual(e.strerror, "file not found")
-
-    def testInvalidFile(self):
-        filepath = getFile("notahdf5file.h5", "notahdf5file.h5")
-        try:
-            with Hdf5db(filepath, app_logger=self.log) as db:
-                self.log.error(f"Unexpected Hdf5db ref: {db}")
-                self.assertTrue(False)  # shouldn't get here
-        except IOError as e:
-            self.assertEqual(e.errno, errno.EINVAL)
-            self.assertEqual(e.strerror, "not an HDF5 file")
-
-    def testGetUUIDByPath(self):
-        # get test file
-        g1Uuid = None
-        filepath = getFile("tall.h5", "getuuidbypath.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            g1Uuid = db.getUUIDByPath("/g1")
-            self.assertEqual(len(g1Uuid), UUID_LEN)
-            obj = db.getObjByPath("/g1")
-            self.assertEqual(obj.name, "/g1")
-            for name in obj:
-                g = obj[name]
-                self.log.debug(f"got obj: {g}")
-            g1links = db.getLinkItems(g1Uuid)
-            self.assertEqual(len(g1links), 2)
-            for item in g1links:
-                self.assertEqual(len(item["id"]), UUID_LEN)
-
-        # end of with will close file
-        # open again and verify we can get obj by name
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            obj = db.getGroupObjByUuid(g1Uuid)
-            g1 = db.getObjByPath("/g1")
-            self.assertEqual(obj, g1)
-
-    def testGetCounts(self):
-        filepath = getFile("tall.h5", "testgetcounts_tall.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            cnt = db.getNumberOfGroups()
-            self.assertEqual(cnt, 6)
-            cnt = db.getNumberOfDatasets()
-            self.assertEqual(cnt, 4)
-            cnt = db.getNumberOfDatatypes()
-            self.assertEqual(cnt, 0)
-
-        filepath = getFile("empty.h5", "testgetcounts_empty.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            cnt = db.getNumberOfGroups()
-            self.assertEqual(cnt, 1)
-            cnt = db.getNumberOfDatasets()
-            self.assertEqual(cnt, 0)
-            cnt = db.getNumberOfDatatypes()
-            self.assertEqual(cnt, 0)
-
-    def testGroupOperations(self):
-        # get test file
-        filepath = getFile("tall.h5", "tall_del_g11.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            rootuuid = db.getUUIDByPath("/")
-            root = db.getGroupObjByUuid(rootuuid)
-            self.assertEqual("/", root.name)
-            rootLinks = db.getLinkItems(rootuuid)
-            self.assertEqual(len(rootLinks), 2)
-            g1uuid = db.getUUIDByPath("/g1")
-            self.assertEqual(len(g1uuid), UUID_LEN)
-            g1Links = db.getLinkItems(g1uuid)
-            self.assertEqual(len(g1Links), 2)
-            g11uuid = db.getUUIDByPath("/g1/g1.1")
-            db.deleteObjectByUuid("group", g11uuid)
-
-    def testCreateGroup(self):
-        # get test file
-        filepath = getFile("tall.h5", "tall_newgrp.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            rootUuid = db.getUUIDByPath("/")
-            numRootChildren = len(db.getLinkItems(rootUuid))
-            self.assertEqual(numRootChildren, 2)
-            newGrpUuid = db.createGroup()
-            newGrp = db.getGroupObjByUuid(newGrpUuid)
-            self.assertNotEqual(newGrp, None)
-            db.linkObject(rootUuid, newGrpUuid, "g3")
-            numRootChildren = len(db.getLinkItems(rootUuid))
-            self.assertEqual(numRootChildren, 3)
-            # verify linkObject can be called idempotent-ly
-            db.linkObject(rootUuid, newGrpUuid, "g3")
-
-    def testGetLinkItemsBatch(self):
-        # get test file
-        filepath = getFile("group100.h5", "getlinkitemsbatch.h5")
-        marker = None
-        count = 0
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            rootUuid = db.getUUIDByPath("/")
-            while True:
-                # get items 13 at a time
-                batch = db.getLinkItems(rootUuid, marker=marker, limit=13)
-                if len(batch) == 0:
-                    break  # done!
-                count += len(batch)
-                lastItem = batch[len(batch) - 1]
-                marker = lastItem["title"]
-        self.assertEqual(count, 100)
-
-    def testGetItemHardLink(self):
-        filepath = getFile("tall.h5", "getitemhardlink.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            grpUuid = db.getUUIDByPath("/g1/g1.1")
-            item = db.getLinkItemByUuid(grpUuid, "dset1.1.1")
-            self.assertTrue("id" in item)
-            self.assertEqual(item["title"], "dset1.1.1")
-            self.assertEqual(item["class"], "H5L_TYPE_HARD")
-            self.assertEqual(item["collection"], "datasets")
-            self.assertTrue("target" not in item)
-            self.assertTrue("mtime" in item)
-            self.assertTrue("ctime" in item)
-
-    def testGetItemSoftLink(self):
-        filepath = getFile("tall.h5", "getitemsoftlink.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            grpUuid = db.getUUIDByPath("/g1/g1.2/g1.2.1")
-            item = db.getLinkItemByUuid(grpUuid, "slink")
-            self.assertTrue("id" not in item)
-            self.assertEqual(item["title"], "slink")
-            self.assertEqual(item["class"], "H5L_TYPE_SOFT")
-            self.assertEqual(item["h5path"], "somevalue")
-            self.assertTrue("mtime" in item)
-            self.assertTrue("ctime" in item)
-
-    def testGetItemExternalLink(self):
-        filepath = getFile("tall_with_udlink.h5", "getitemexternallink.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            grpUuid = db.getUUIDByPath("/g1/g1.2")
-            item = db.getLinkItemByUuid(grpUuid, "extlink")
-            self.assertTrue("uuid" not in item)
-            self.assertEqual(item["title"], "extlink")
-            self.assertEqual(item["class"], "H5L_TYPE_EXTERNAL")
-            self.assertEqual(item["h5path"], "somepath")
-            self.assertEqual(item["file"], "somefile")
-            self.assertTrue("mtime" in item)
-            self.assertTrue("ctime" in item)
-
-    def testGetItemUDLink(self):
-        filepath = getFile("tall_with_udlink.h5", "getitemudlink.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            grpUuid = db.getUUIDByPath("/g2")
-            item = db.getLinkItemByUuid(grpUuid, "udlink")
-            self.assertTrue("uuid" not in item)
-            self.assertEqual(item["title"], "udlink")
-            self.assertEqual(item["class"], "H5L_TYPE_USER_DEFINED")
-            self.assertTrue("h5path" not in item)
-            self.assertTrue("file" not in item)
-            self.assertTrue("mtime" in item)
-            self.assertTrue("ctime" in item)
-
-    def testGetNumLinks(self):
-        filepath = getFile("tall.h5", "getnumlinks.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            g1 = db.getObjByPath("/g1")
-            numLinks = db.getNumLinksToObject(g1)
-            self.assertEqual(numLinks, 1)
-
-    def testGetLinks(self):
-        g12_links = ("extlink", "g1.2.1")
-        hardLink = None
-        externalLink = None
-        filepath = getFile("tall_with_udlink.h5", "getlinks.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            grpUuid = db.getUUIDByPath("/g1/g1.2")
-            items = db.getLinkItems(grpUuid)
-            self.assertEqual(len(items), 2)
-            for item in items:
-                self.assertTrue(item["title"] in g12_links)
-                if item["class"] == "H5L_TYPE_HARD":
-                    hardLink = item
-                elif item["class"] == "H5L_TYPE_EXTERNAL":
-                    externalLink = item
-        self.assertEqual(hardLink["collection"], "groups")
-        self.assertTrue("id" in hardLink)
-        self.assertTrue("id" not in externalLink)
-        self.assertEqual(externalLink["h5path"], "somepath")
-        self.assertEqual(externalLink["file"], "somefile")
-
-    def testDeleteLink(self):
-        # get test file
-        filepath = getFile("tall.h5", "deletelink.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            rootUuid = db.getUUIDByPath("/")
-            numRootChildren = len(db.getLinkItems(rootUuid))
-            self.assertEqual(numRootChildren, 2)
-            db.unlinkItem(rootUuid, "g2")
-            numRootChildren = len(db.getLinkItems(rootUuid))
-            self.assertEqual(numRootChildren, 1)
-
-    def testDeleteUDLink(self):
-        # get test file
-        filepath = getFile("tall_with_udlink.h5", "deleteudlink.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            g2Uuid = db.getUUIDByPath("/g2")
-            numG2Children = len(db.getLinkItems(g2Uuid))
-            self.assertEqual(numG2Children, 3)
-            got_exception = False
-            try:
-                db.unlinkItem(g2Uuid, "udlink")
-            except IOError as ioe:
-                got_exception = True
-                self.assertEqual(ioe.errno, errno.EPERM)
-            self.assertTrue(got_exception)
-            numG2Children = len(db.getLinkItems(g2Uuid))
-            self.assertEqual(numG2Children, 3)
-
-    def testReadOnlyGetUUID(self):
-        # get test file
-        filepath = getFile("tall.h5", "readonlygetuuid.h5", ro=True)
-        # remove db file!
-        removeFile("./out/." + "readonlygetuuid.h5")
-        g1Uuid = None
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            g1Uuid = db.getUUIDByPath("/g1")
-            self.assertEqual(len(g1Uuid), UUID_LEN)
-            obj = db.getObjByPath("/g1")
-            self.assertEqual(obj.name, "/g1")
-
-        # end of with will close file
-        # open again and verify we can get obj by name
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            obj = db.getGroupObjByUuid(g1Uuid)
-            g1 = db.getObjByPath("/g1")
-            self.assertEqual(obj, g1)
-            g1links = db.getLinkItems(g1Uuid)
-            self.assertEqual(len(g1links), 2)
-            for item in g1links:
-                self.assertEqual(len(item["id"]), UUID_LEN)
-
-    def testReadDataset(self):
-        filepath = getFile("tall.h5", "readdataset.h5")
-        d111_values = None
-        d112_values = None
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            d111Uuid = db.getUUIDByPath("/g1/g1.1/dset1.1.1")
-            self.assertEqual(len(d111Uuid), UUID_LEN)
-            d111_values = db.getDatasetValuesByUuid(d111Uuid)
-            self.assertTrue(type(d111_values) is list)
-            self.assertEqual(len(d111_values), 10)
-            for i in range(10):
-                arr = d111_values[i]
-                self.assertEqual(len(arr), 10)
-                for j in range(10):
-                    self.assertEqual(arr[j], i * j)
-
-            d112Uuid = db.getUUIDByPath("/g1/g1.1/dset1.1.2")
-            self.assertEqual(len(d112Uuid), UUID_LEN)
-            d112_values = db.getDatasetValuesByUuid(d112Uuid)
-            self.assertTrue(type(d112_values) is list)
-            self.assertEqual(len(d112_values), 20)
-            for i in range(20):
-                self.assertEqual(d112_values[i], i)
-
-    def testReadDatasetBinary(self):
-        filepath = getFile("tall.h5", "readdatasetbinary.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            d111Uuid = db.getUUIDByPath("/g1/g1.1/dset1.1.1")
-            self.assertEqual(len(d111Uuid), UUID_LEN)
-            d111_data = db.getDatasetValuesByUuid(d111Uuid, format="binary")
-            self.assertTrue(type(d111_data) is bytes)
-            self.assertEqual(len(d111_data), 400)  # 10x10x(4 byte type)
-
-            d112Uuid = db.getUUIDByPath("/g1/g1.1/dset1.1.2")
-            self.assertEqual(len(d112Uuid), UUID_LEN)
-            d112_data = db.getDatasetValuesByUuid(d112Uuid, format="binary")
-            self.assertEqual(len(d112_data), 80)  # 20x(4 byte type)
-
-    def testReadCompoundDataset(self):
-        filepath = getFile("compound.h5", "readcompound.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            dset_uuid = db.getUUIDByPath("/dset")
-            self.assertEqual(len(dset_uuid), UUID_LEN)
-            dset_values = db.getDatasetValuesByUuid(dset_uuid)
-
-            self.assertEqual(len(dset_values), 72)
-            elem = dset_values[0]
-            self.assertEqual(elem[0], 24)
-            self.assertEqual(elem[1], "13:53")
-            self.assertEqual(elem[2], 63)
-            self.assertEqual(elem[3], 29.88)
-            self.assertEqual(elem[4], "SE 10")
-
-    def testReadDatasetCreationProp(self):
-        filepath = getFile("compound.h5", "readdatasetcreationprop.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            dset_uuid = db.getUUIDByPath("/dset")
-            self.assertEqual(len(dset_uuid), UUID_LEN)
-            dset_item = db.getDatasetItemByUuid(dset_uuid)
-            self.assertTrue("creationProperties" in dset_item)
-            creationProp = dset_item["creationProperties"]
-            self.assertTrue("fillValue" in creationProp)
-            fillValue = creationProp["fillValue"]
-
-            self.assertEqual(fillValue[0], 999)
-            self.assertEqual(fillValue[1], "99:90")
-            self.assertEqual(fillValue[2], 999)
-            self.assertEqual(fillValue[3], 999.0)
-            self.assertEqual(fillValue[4], "N")
-
-    def testCreateScalarDataset(self):
-        creation_props = {
-            "allocTime": "H5D_ALLOC_TIME_LATE",
-            "fillTime": "H5D_FILL_TIME_IFSET",
-            "fillValue": "",
-            "layout": {"class": "H5D_CONTIGUOUS"},
-        }
-        datatype = {
-            "charSet": "H5T_CSET_ASCII",
-            "class": "H5T_STRING",
-            "length": 1,
-            "strPad": "H5T_STR_NULLPAD",
-        }
-        filepath = getFile("empty.h5", "createscalardataset.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            dims = ()  # if no space in body, default to scalar
-            max_shape = None
-
-            db.createDataset(
-                datatype, dims, max_shape=max_shape, creation_props=creation_props
-            )
-
-    def testCreate1dDataset(self):
-        datatype = "H5T_STD_I64LE"
-        dims = (10,)
-        filepath = getFile("empty.h5", "create1ddataset.h5")
-        dset_uuid = None
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            rsp = db.createDataset(datatype, dims)
-
-            dset_uuid = rsp["id"]
-            item = db.getDatasetItemByUuid(dset_uuid)
-            self.assertEqual(item["attributeCount"], 0)
-            type_item = item["type"]
-            self.assertEqual(type_item["class"], "H5T_INTEGER")
-            self.assertEqual(type_item["base"], "H5T_STD_I64LE")
-            shape_item = item["shape"]
-            self.assertEqual(shape_item["class"], "H5S_SIMPLE")
-            self.assertEqual(shape_item["dims"], (10,))
-
-    def testCreate2dExtendableDataset(self):
-        datatype = "H5T_STD_I64LE"
-        dims = (10, 10)
-        max_shape = (None, 10)
-        filepath = getFile("empty.h5", "create2dextendabledataset.h5")
-        dset_uuid = None
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            rsp = db.createDataset(datatype, dims, max_shape=max_shape)
-            dset_uuid = rsp["id"]
-            item = db.getDatasetItemByUuid(dset_uuid)
-            self.assertEqual(item["attributeCount"], 0)
-            type_item = item["type"]
-            self.assertEqual(type_item["class"], "H5T_INTEGER")
-            self.assertEqual(type_item["base"], "H5T_STD_I64LE")
-            shape_item = item["shape"]
-            self.assertEqual(shape_item["class"], "H5S_SIMPLE")
-            self.assertEqual(shape_item["dims"], (10, 10))
-            self.assertTrue("maxdims" in shape_item)
-            self.assertEqual(shape_item["maxdims"], [0, 10])
-
-    def testCreateCommittedTypeDataset(self):
-        filepath = getFile("empty.h5", "createcommittedtypedataset.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            root_uuid = db.getUUIDByPath("/")
-            self.assertTrue(len(root_uuid) >= 36)
-
-            datatype = {
-                "charSet": "H5T_CSET_ASCII",
-                "class": "H5T_STRING",
-                "strPad": "H5T_STR_NULLTERM",
-                "length": 15,
-            }
-            item = db.createCommittedType(datatype)
-            type_uuid = item["id"]
-
-            dims = ()  # if no space in body, default to scalar
-            rsp = db.createDataset(type_uuid, dims, max_shape=None, creation_props=None)
-            dset_uuid = rsp["id"]
-            item = db.getDatasetItemByUuid(dset_uuid)
-            type_item = item["type"]
-            self.assertTrue("uuid" in type_item)
-            self.assertEqual(type_item["uuid"], type_uuid)
-
-    def testCreateCommittedCompoundTypeDataset(self):
-        filepath = getFile("empty.h5", "createcommittedcompoundtypedataset.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            root_uuid = db.getUUIDByPath("/")
-            self.assertTrue(len(root_uuid) >= 36)
-
-            datatype = {"class": "H5T_COMPOUND", "fields": []}
-
-            type_fields = []
-            type_fields.append({"name": "field_1", "type": "H5T_STD_I64BE"})
-            type_fields.append({"name": "field_2", "type": "H5T_IEEE_F64BE"})
-
-            datatype["fields"] = type_fields
-
-            creation_props = {"fillValue": [0, 0.0]}
-
-            item = db.createCommittedType(datatype)
-            type_uuid = item["id"]
-
-            dims = ()  # if no space in body, default to scalar
-            rsp = db.createDataset(
-                type_uuid, dims, max_shape=None, creation_props=creation_props
-            )
-            dset_uuid = rsp["id"]
-            item = db.getDatasetItemByUuid(dset_uuid)
-            type_item = item["type"]
-            self.assertTrue("uuid" in type_item)
-            self.assertEqual(type_item["uuid"], type_uuid)
-
-    def testReadZeroDimDataset(self):
-        filepath = getFile("zerodim.h5", "readzerodeimdataset.h5")
-
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            dsetUuid = db.getUUIDByPath("/dset")
-            self.assertEqual(len(dsetUuid), UUID_LEN)
-            dset_value = db.getDatasetValuesByUuid(dsetUuid)
-            self.assertEqual(dset_value, 42)
-
-    def testReadNullSpaceDataset(self):
-        filepath = getFile("null_space_dset.h5", "readnullspacedataset.h5")
-
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            dsetUuid = db.getUUIDByPath("/DS1")
-            self.assertEqual(len(dsetUuid), UUID_LEN)
-            obj = db.getDatasetObjByUuid(dsetUuid)
-            shape_item = db.getShapeItemByDsetObj(obj)
-            self.assertTrue("class" in shape_item)
-            self.assertEqual(shape_item["class"], "H5S_NULL")
-
-    def testReadScalarSpaceArrayDataset(self):
-        filepath = getFile("scalar_array_dset.h5", "readscalarspacearraydataset.h5")
-
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            dsetUuid = db.getUUIDByPath("/DS1")
-            self.assertEqual(len(dsetUuid), UUID_LEN)
-            obj = db.getDatasetObjByUuid(dsetUuid)
-            shape_item = db.getShapeItemByDsetObj(obj)
-            self.assertTrue("class" in shape_item)
-            self.assertEqual(shape_item["class"], "H5S_SCALAR")
-
-    def testReadNullSpaceAttribute(self):
-        filepath = getFile("null_space_attr.h5", "readnullspaceattr.h5")
+    def testGroup(self):
+        with Hdf5db(app_logger=self.log) as db:
+            root_id = db.getObjectIdByPath("/")
+            self.assertTrue(isSchema2Id(root_id))
+            self.assertTrue(isRootObjId(root_id))
+
+            g1_id = db.createGroup()
+            self.assertTrue(isSchema2Id(g1_id))
+            self.assertFalse(isRootObjId(g1_id))
+            self.assertTrue(isValidUuid(g1_id, obj_class="groups"))
+            db.createHardLink(root_id, "g1", g1_id)
+
+            g2_id = db.createGroup()
+            self.assertTrue(isSchema2Id(g2_id))
+            self.assertFalse(isRootObjId(g2_id))
+            self.assertTrue(isValidUuid(g2_id, obj_class="groups"))
+            db.createHardLink(root_id, "g2", g2_id)
+
+            g1_1_id = db.createGroup()
+            self.assertTrue(isSchema2Id(g1_1_id))
+            self.assertFalse(isRootObjId(g1_1_id))
+            self.assertTrue(isValidUuid(g1_1_id, obj_class="groups"))
+            db.createHardLink(g1_id, "g1.1", g1_1_id)
+
+            self.assertEqual(db.getObjectIdByPath("g1"), g1_id)
+            self.assertEqual(db.getObjectIdByPath("/g1"), g1_id)
+            self.assertEqual(db.getObjectIdByPath("g1/"), g1_id)
+
+            self.assertEqual(db.getObjectIdByPath("g1/g1.1"), g1_1_id)
+            self.assertEqual(db.getObjectIdByPath("/g1/g1.1"), g1_1_id)
+            self.assertEqual(db.getObjectIdByPath("g1/g1.1/"), g1_1_id)
+
+            grp1_json = db.getObjectById(g1_id)
+            self.assertTrue("links" in grp1_json)
+            g1_links = grp1_json["links"]
+            self.assertTrue("g1.1" in g1_links)
+            g1_1_link = db.getLink(g1_id, "g1.1")
+            self.assertEqual(g1_1_link["class"], "H5L_TYPE_HARD")
+            self.assertEqual(g1_1_link["id"], g1_1_id)
+            self.assertTrue(g1_1_link["created"] > time.time() - 1.0)
+
+            db.createSoftLink(g2_id, "slink", "somewhere")
+            soft_link = db.getLink(g2_id, "slink")
+            self.assertEqual(soft_link["class"], "H5L_TYPE_SOFT")
+            self.assertEqual(soft_link["h5path"], "somewhere")
+            self.assertTrue(soft_link["created"] > time.time() - 1.0)
+
+            db.createExternalLink(g2_id, "extlink", "somewhere", "someplace")
+            ext_link = db.getLink(g2_id, "extlink")
+            self.assertEqual(ext_link["class"], "H5L_TYPE_EXTERNAL")
+            self.assertEqual(ext_link["h5path"], "somewhere")
+            self.assertEqual(ext_link["file"], "someplace")
+            self.assertTrue(ext_link["created"] > time.time() - 1.0)
+
+            db.createCustomLink(g2_id, "cust", {"foo": "bar"})
+            cust_link = db.getLink(g2_id, "cust")
+            self.assertEqual(cust_link["class"], "H5L_TYPE_USER_DEFINED")
+            self.assertEqual(cust_link["foo"], "bar")
+            self.assertTrue(cust_link["created"] > time.time() - 1.0)
+
+            links = db.getLinks(g2_id)
+            self.assertEqual(len(links), 3)
+            for title in "slink", "extlink", "cust":
+                self.assertTrue(title in links)
+
+            db.deleteLink(g2_id, "cust")
+            links = db.getLinks(g2_id)
+            self.assertEqual(len(links), 2)
+            for title in "slink", "extlink":
+                self.assertTrue(title in links)
 
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            rootUuid = db.getUUIDByPath("/")
-            self.assertEqual(len(rootUuid), UUID_LEN)
-            item = db.getAttributeItem("groups", rootUuid, "attr1")
+            try:
+                db.getObjectIdByPath("/g1/foo")
+                self.assertTrue(False)
+            except KeyError:
+                pass  # expected
+
+            ret = db.getLink(g2_id, "not_a_link")
+            self.assertTrue(ret is None)
+
+    def testNullSpaceAttribute(self):
+        with Hdf5db(app_logger=self.log) as db:
+            root_id = db.getObjectIdByPath("/")
+            db.createAttribute(root_id, "A1", None, shape="H5S_NULL", dtype=np.int32)
+            item = db.getAttribute(root_id, "A1")
             self.assertTrue("shape" in item)
             shape_item = item["shape"]
             self.assertTrue("class" in shape_item)
             self.assertEqual(shape_item["class"], "H5S_NULL")
+            self.assertTrue(item["created"] > time.time() - 1.0)
+            value = db.getAttributeValue(root_id, "A1")
+            self.assertEqual(value, None)
 
-    def testReadAttribute(self):
-        # getAttributeItemByUuid
-        item = None
-        filepath = getFile("tall.h5", "readattribute.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            rootUuid = db.getUUIDByPath("/")
-            self.assertEqual(len(rootUuid), UUID_LEN)
-            item = db.getAttributeItem("groups", rootUuid, "attr1")
-            self.assertTrue(item is not None)
-
-    def testWriteScalarAttribute(self):
-        # getAttributeItemByUuid
-        item = None
-        filepath = getFile("empty.h5", "writescalarattribute.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            root_uuid = db.getUUIDByPath("/")
+    def testScalarAttribute(self):
+        with Hdf5db(app_logger=self.log) as db:
+            root_id = db.getObjectIdByPath("/")
             dims = ()
-            datatype = "H5T_STD_I32LE"
             value = 42
-            db.createAttribute("groups", root_uuid, "A1", dims, datatype, value)
-            item = db.getAttributeItem("groups", root_uuid, "A1")
-            self.assertEqual(item["name"], "A1")
+            db.createAttribute(root_id, "A1", value, shape=dims, dtype=np.int32)
+            item = db.getAttribute(root_id, "A1")
+            shape_json = item["shape"]
+            self.assertEqual(shape_json["class"], "H5S_SCALAR")
+            self.assertEqual(len(shape_json.keys()), 1)  # just one key should be returned
+            item_type = item["type"]
+            self.assertEqual(item_type["class"], "H5T_INTEGER")
+            self.assertEqual(item_type["base"], "H5T_STD_I32LE")
+            self.assertEqual(len(item_type.keys()), 2)  # just two keys should be returned
             self.assertEqual(item["value"], 42)
             now = int(time.time())
-            self.assertTrue(item["ctime"] > now - 5)
-            self.assertTrue(item["mtime"] > now - 5)
+            self.assertTrue(item["created"] > now - 1)
             shape = item["shape"]
             self.assertEqual(shape["class"], "H5S_SCALAR")
-            item_type = item["type"]
 
             self.assertEqual(item_type["class"], "H5T_INTEGER")
             self.assertEqual(item_type["base"], "H5T_STD_I32LE")
-            self.assertEqual(
-                len(item_type.keys()), 2
-            )  # just two keys should be returned
-
-    def testWriteFixedStringAttribute(self):
-        # getAttributeItemByUuid
-        item = None
-        filepath = getFile("empty.h5", "writefixedstringattribute.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            root_uuid = db.getUUIDByPath("/")
-            dims = ()
-            datatype = {
-                "charSet": "H5T_CSET_ASCII",
-                "class": "H5T_STRING",
-                "strPad": "H5T_STR_NULLPAD",
-                "length": 13,
-            }
+
+    def testFixedStringAttribute(self):
+        with Hdf5db(app_logger=self.log) as db:
+            root_id = db.getObjectIdByPath("/")
             value = "Hello, world!"
-            db.createAttribute("groups", root_uuid, "A1", dims, datatype, value)
-            item = db.getAttributeItem("groups", root_uuid, "A1")
-            self.assertEqual(item["name"], "A1")
-            self.assertEqual(item["value"], "Hello, world!")
-            now = int(time.time())
-            self.assertTrue(item["ctime"] > now - 5)
-            self.assertTrue(item["mtime"] > now - 5)
-            shape = item["shape"]
-            self.assertEqual(shape["class"], "H5S_SCALAR")
+            db.createAttribute(root_id, "A1", value, dtype=np.dtype("S13"))  # dims, datatype, value)
+            item = db.getAttribute(root_id, "A1")
+            shape_json = item["shape"]
+            self.assertEqual(shape_json["class"], "H5S_SCALAR")
             item_type = item["type"]
-            self.assertEqual(item_type["length"], 13)
             self.assertEqual(item_type["class"], "H5T_STRING")
             self.assertEqual(item_type["strPad"], "H5T_STR_NULLPAD")
+            self.assertEqual(item_type["length"], 13)
             self.assertEqual(item_type["charSet"], "H5T_CSET_ASCII")
+            self.assertEqual(item["value"], "Hello, world!")
+            now = int(time.time())
+            self.assertTrue(item["created"] > now - 1)
+            ret_value = db.getAttributeValue(root_id, "A1")
+            self.assertEqual(ret_value, value.encode("ascii"))
+
+    def testVlenAsciiAttribute(self):
+        with Hdf5db(app_logger=self.log) as db:
+            root_id = db.getObjectIdByPath("/")
 
-    def testWriteFixedNullTermStringAttribute(self):
-        # getAttributeItemByUuid
-        item = None
-        filepath = getFile("empty.h5", "writefixednulltermstringattribute.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            root_uuid = db.getUUIDByPath("/")
-            dims = ()
-            datatype = {
-                "charSet": "H5T_CSET_ASCII",
-                "class": "H5T_STRING",
-                "strPad": "H5T_STR_NULLTERM",
-                "length": 13,
-            }
             value = b"Hello, world!"
+            dt = special_dtype(vlen=bytes)
 
             # write the attribute
-            db.createAttribute("groups", root_uuid, "A1", dims, datatype, value)
+            db.createAttribute(root_id, "A1", value, dtype=dt)
             # read it back
-            item = db.getAttributeItem("groups", root_uuid, "A1")
-
-            self.assertEqual(item["name"], "A1")
-            # the following compare fails - see issue #34
-            # self.assertEqual(item['value'], "Hello, world!")
-            now = int(time.time())
-            self.assertTrue(item["ctime"] > now - 5)
-            self.assertTrue(item["mtime"] > now - 5)
-            shape = item["shape"]
-            self.assertEqual(shape["class"], "H5S_SCALAR")
+            item = db.getAttribute(root_id, "A1")
+            shape_json = item["shape"]
+            self.assertEqual(shape_json["class"], "H5S_SCALAR")
             item_type = item["type"]
-            self.assertEqual(item_type["length"], 13)
             self.assertEqual(item_type["class"], "H5T_STRING")
-            # NULLTERM get's converted to NULLPAD since the numpy dtype does not
-            # support other padding conventions.
-            self.assertEqual(item_type["strPad"], "H5T_STR_NULLPAD")
+            self.assertEqual(item_type["strPad"], "H5T_STR_NULLTERM")
+            self.assertEqual(item_type["length"], "H5T_VARIABLE")
             self.assertEqual(item_type["charSet"], "H5T_CSET_ASCII")
-
-    def testWriteVlenStringAttribute(self):
-        # getAttributeItemByUuid
-        item = None
-        filepath = getFile("empty.h5", "writevlenstringattribute.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            root_uuid = db.getUUIDByPath("/")
-            dims = ()
-            datatype = {
-                "charSet": "H5T_CSET_ASCII",
-                "class": "H5T_STRING",
-                "strPad": "H5T_STR_NULLTERM",
-                "length": "H5T_VARIABLE",
-            }
-
-            # value = np.string_("Hello, world!")
-            value = "Hello, world!"
-            db.createAttribute("groups", root_uuid, "A1", dims, datatype, value)
-            item = db.getAttributeItem("groups", root_uuid, "A1")
-            self.assertEqual(item["name"], "A1")
             self.assertEqual(item["value"], "Hello, world!")
             now = int(time.time())
-            self.assertTrue(item["ctime"] > now - 5)
-            self.assertTrue(item["mtime"] > now - 5)
-            shape = item["shape"]
-            self.assertEqual(shape["class"], "H5S_SCALAR")
-            item_type = item["type"]
-            self.assertEqual(item_type["class"], "H5T_STRING")
-            self.assertEqual(item_type["strPad"], "H5T_STR_NULLTERM")
-            self.assertEqual(item_type["charSet"], "H5T_CSET_ASCII")
-            self.assertEqual(item_type["length"], "H5T_VARIABLE")
+            self.assertTrue(item["created"] > now - 1)
 
-    def testReadVlenStringDataset(self):
-        item = None
-        filepath = getFile("vlen_string_dset.h5", "vlen_string_dset.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            dset_uuid = db.getUUIDByPath("/DS1")
-            item = db.getDatasetItemByUuid(dset_uuid)
-            shape = item["shape"]
-            self.assertEqual(shape["class"], "H5S_SIMPLE")
-            dims = shape["dims"]
-            self.assertEqual(len(dims), 1)
-            self.assertEqual(dims[0], 4)
-            item_type = item["type"]
-            self.assertEqual(item_type["class"], "H5T_STRING")
-            # actual padding is SPACEPAD - See issue #32
-            self.assertEqual(item_type["strPad"], "H5T_STR_NULLTERM")
-            self.assertEqual(item_type["charSet"], "H5T_CSET_ASCII")
-            self.assertEqual(item_type["length"], "H5T_VARIABLE")
-            row = db.getDatasetValuesByUuid(dset_uuid, (slice(0, 1),))
-            self.assertEqual(row, ["Parting"])
-
-    def testReadVlenStringDataset_utc(self):
-        item = None
-        filepath = getFile("vlen_string_dset_utc.h5", "vlen_string_dset_utc.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            dset_uuid = db.getUUIDByPath("/ds1")
-            item = db.getDatasetItemByUuid(dset_uuid)
-            shape = item["shape"]
-            self.assertEqual(shape["class"], "H5S_SIMPLE")
-            dims = shape["dims"]
-            self.assertEqual(len(dims), 1)
-            self.assertEqual(dims[0], 2293)
+    def testVlenUtf8Attribute(self):
+        with Hdf5db(app_logger=self.log) as db:
+            root_id = db.getObjectIdByPath("/")
+
+            value = b"Hello, world!"
+            dt = special_dtype(vlen=str)
+
+            # write the attribute
+            db.createAttribute(root_id, "A1", value, dtype=dt)
+            # read it back
+            item = db.getAttribute(root_id, "A1")
+            shape_json = item["shape"]
+            self.assertEqual(shape_json["class"], "H5S_SCALAR")
             item_type = item["type"]
             self.assertEqual(item_type["class"], "H5T_STRING")
             self.assertEqual(item_type["strPad"], "H5T_STR_NULLTERM")
-            self.assertEqual(item_type["charSet"], "H5T_CSET_ASCII")
             self.assertEqual(item_type["length"], "H5T_VARIABLE")
-            # next line throws conversion error - see issue #19
-            # row = db.getDatasetValuesByUuid(dset_uuid, (slice(0, 1),))
-
-    def testReadFixedStringDataset(self):
-        item = None
-        filepath = getFile("fixed_string_dset.h5", "fixed_string_dset.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            dset_uuid = db.getUUIDByPath("/DS1")
-            item = db.getDatasetItemByUuid(dset_uuid)
-            shape = item["shape"]
-            self.assertEqual(shape["class"], "H5S_SIMPLE")
-            dims = shape["dims"]
-            self.assertEqual(len(dims), 1)
-            self.assertEqual(dims[0], 4)
-            item_type = item["type"]
-            self.assertEqual(item_type["class"], "H5T_STRING")
-            self.assertEqual(item_type["strPad"], "H5T_STR_NULLPAD")
-            self.assertEqual(item_type["charSet"], "H5T_CSET_ASCII")
-            self.assertEqual(item_type["length"], 7)
-            row = db.getDatasetValuesByUuid(dset_uuid)
-            self.assertEqual(row, ["Parting", "is such", "sweet", "sorrow."])
-            row = db.getDatasetValuesByUuid(dset_uuid, (slice(0, 1),))
-            self.assertEqual(
-                row,
-                [
-                    "Parting",
-                ],
-            )
-            row = db.getDatasetValuesByUuid(dset_uuid, (slice(2, 3),))
-            self.assertEqual(
-                row,
-                [
-                    "sweet",
-                ],
-            )
-
-    def testReadFixedStringDatasetBinary(self):
-        item = None
-        filepath = getFile("fixed_string_dset.h5", "fixed_string_dset.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            dset_uuid = db.getUUIDByPath("/DS1")
-            item = db.getDatasetItemByUuid(dset_uuid)
-            shape = item["shape"]
-            self.assertEqual(shape["class"], "H5S_SIMPLE")
-            dims = shape["dims"]
-            self.assertEqual(len(dims), 1)
-            self.assertEqual(dims[0], 4)
-            item_type = item["type"]
-            self.assertEqual(item_type["class"], "H5T_STRING")
-            self.assertEqual(item_type["strPad"], "H5T_STR_NULLPAD")
-            self.assertEqual(item_type["charSet"], "H5T_CSET_ASCII")
-            self.assertEqual(item_type["length"], 7)
-            row = db.getDatasetValuesByUuid(dset_uuid, format="binary")
-            self.assertEqual(row, b"Partingis suchsweet\x00\x00sorrow.")
-            row = db.getDatasetValuesByUuid(dset_uuid, (slice(0, 1),), format="binary")
-            self.assertEqual(row, b"Parting")
-            row = db.getDatasetValuesByUuid(dset_uuid, (slice(2, 3),), format="binary")
-            self.assertEqual(row, b"sweet\x00\x00")
-
-    def testWriteVlenUnicodeAttribute(self):
-        # getAttributeItemByUuid
-        item = None
-        filepath = getFile("empty.h5", "writevlenunicodeattribute.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            root_uuid = db.getUUIDByPath("/")
-            dims = ()
-            datatype = {
-                "charSet": "H5T_CSET_UTF8",
-                "class": "H5T_STRING",
-                "strPad": "H5T_STR_NULLTERM",
-                "length": "H5T_VARIABLE",
-            }
-            value = "\u6b22\u8fce\u63d0\u4ea4\u5fae\u535a\u641c\u7d22\u4f7f\u7528\u53cd\u9988\uff0c\u8bf7\u76f4\u63a5"
-            db.createAttribute("groups", root_uuid, "A1", dims, datatype, value)
-            item = db.getAttributeItem("groups", root_uuid, "A1")
-
-            self.assertEqual(item["name"], "A1")
-            self.assertEqual(item["value"], value)
-            now = int(time.time())
-            self.assertTrue(item["ctime"] > now - 5)
-            self.assertTrue(item["mtime"] > now - 5)
-            shape = item["shape"]
-            self.assertEqual(shape["class"], "H5S_SCALAR")
-            item_type = item["type"]
-            self.assertEqual(item_type["class"], "H5T_STRING")
-            self.assertEqual(item_type["strPad"], "H5T_STR_NULLTERM")
             self.assertEqual(item_type["charSet"], "H5T_CSET_UTF8")
-            self.assertEqual(item_type["length"], "H5T_VARIABLE")
+            self.assertEqual(item["value"], "Hello, world!")
+            now = int(time.time())
+            self.assertTrue(item["created"] > now - 1)
 
-    def testWriteIntAttribute(self):
-        # getAttributeItemByUuid
-        item = None
-        filepath = getFile("empty.h5", "writeintattribute.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            root_uuid = db.getUUIDByPath("/")
-            dims = (5,)
-            datatype = "H5T_STD_I16LE"
+    def testIntAttribute(self):
+        with Hdf5db(app_logger=self.log) as db:
+            root_id = db.getObjectIdByPath("/")
             value = [2, 3, 5, 7, 11]
-            db.createAttribute("groups", root_uuid, "A1", dims, datatype, value)
-            item = db.getAttributeItem("groups", root_uuid, "A1")
-            self.assertEqual(item["name"], "A1")
+            db.createAttribute(root_id, "A1", value, dtype=np.int16)
+            item = db.getAttribute(root_id, "A1")
             self.assertEqual(item["value"], [2, 3, 5, 7, 11])
             now = int(time.time())
-            self.assertTrue(item["ctime"] > now - 5)
-            self.assertTrue(item["mtime"] > now - 5)
-            shape = item["shape"]
-            self.assertEqual(shape["class"], "H5S_SIMPLE")
+            self.assertTrue(item["created"] > now - 1)
+            item_shape = item["shape"]
+            self.assertEqual(item_shape["class"], "H5S_SIMPLE")
+            self.assertEqual(item_shape["dims"], [5,])
             item_type = item["type"]
             self.assertEqual(item_type["class"], "H5T_INTEGER")
             self.assertEqual(item_type["base"], "H5T_STD_I16LE")
 
     def testCreateReferenceAttribute(self):
-        filepath = getFile("empty.h5", "createreferencedataset.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            root_uuid = db.getUUIDByPath("/")
-
-            dims = ()  # if no space in body, default to scalar
-            rsp = db.createDataset(
-                "H5T_STD_I64LE", dims, max_shape=None, creation_props=None
-            )
-            dset_uuid = rsp["id"]
-            db.linkObject(root_uuid, dset_uuid, "DS1")
-
-            dims = (1,)
-            datatype = {"class": "H5T_REFERENCE", "base": "H5T_STD_REF_OBJ"}
-            ds1_ref = "datasets/" + dset_uuid
-            value = [
-                ds1_ref,
-            ]
-            db.createAttribute("groups", root_uuid, "A1", dims, datatype, value)
-            item = db.getAttributeItem("groups", root_uuid, "A1")
-
-            attr_type = item["type"]
+        with Hdf5db(app_logger=self.log) as db:
+            root_id = db.getObjectIdByPath("/")
+
+            dset_id = db.createDataset(shape=(), dtype=np.int32)
+            db.createHardLink(root_id, "DS1", dset_id)
+
+            dt = special_dtype(ref=Reference)
+
+            ds1_ref = "datasets/" + dset_id
+            value = [ds1_ref,]
+            db.createAttribute(root_id, "A1", value, dtype=dt)
+            item = db.getAttribute(root_id, "A1")
+            attr = db.getAttribute(root_id, "A1")
+            self.assertTrue("shape" in attr)
+
+            attr_type = attr["type"]
             self.assertEqual(attr_type["class"], "H5T_REFERENCE")
             self.assertEqual(attr_type["base"], "H5T_STD_REF_OBJ")
             attr_value = item["value"]
@@ -850,149 +261,47 @@ def testCreateReferenceAttribute(self):
             self.assertEqual(attr_value[0], ds1_ref)
 
     def testCreateVlenReferenceAttribute(self):
-        filepath = getFile("empty.h5", "createreferenceattribute.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            root_uuid = db.getUUIDByPath("/")
-
-            dims = ()  # if no space in body, default to scalar
-            rsp = db.createDataset(
-                "H5T_STD_I64LE", dims, max_shape=None, creation_props=None
-            )
-            dset_uuid = rsp["id"]
-            db.linkObject(root_uuid, dset_uuid, "DS1")
-
-            dims = (1,)
-            datatype = {
-                "class": "H5T_VLEN",
-                "base": {"class": "H5T_REFERENCE", "base": "H5T_STD_REF_OBJ"},
-            }
-            ds1_ref = "datasets/" + dset_uuid
-            value = [
-                [
-                    ds1_ref,
-                ],
-            ]
-            db.createAttribute("groups", root_uuid, "A1", dims, datatype, value)
-            item = db.getAttributeItem("groups", root_uuid, "A1")
-
-            attr_type = item["type"]
-            self.assertEqual(attr_type["class"], "H5T_VLEN")
-            base_type = attr_type["base"]
-            # todo - this should be H5T_REFERENCE, not H5T_OPAQUE
-            # See h5py issue: https://github.com/h5py/h5py/issues/553
-            import h5py
-
-            # test based on h5py version until we change install requirements
-            if h5py.version.version_tuple >= (2, 6, 0):
-                self.assertEqual(base_type["class"], "H5T_REFERENCE")
-            else:
-                self.assertEqual(base_type["class"], "H5T_OPAQUE")
-
-    def testCreateReferenceListAttribute(self):
-        filepath = getFile("empty.h5", "createreferencelistattribute.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            root_uuid = db.getUUIDByPath("/")
-
-            dims = (10,)
-
-            rsp = db.createDataset(
-                "H5T_STD_I64LE", dims, max_shape=None, creation_props=None
-            )
-            dset_uuid = rsp["id"]
-            db.linkObject(root_uuid, dset_uuid, "dset")
-
-            rsp = db.createDataset(
-                "H5T_STD_I64LE", dims, max_shape=None, creation_props=None
-            )
-            xscale_uuid = rsp["id"]
-            nullterm_string_type = {
-                "charSet": "H5T_CSET_ASCII",
-                "class": "H5T_STRING",
-                "length": 16,
-                "strPad": "H5T_STR_NULLTERM",
-            }
-            scalar_dims = ()
-            db.createAttribute(
-                "datasets",
-                xscale_uuid,
-                "CLASS",
-                scalar_dims,
-                nullterm_string_type,
-                "DIMENSION_SCALE",
-            )
-            db.linkObject(root_uuid, xscale_uuid, "xscale")
-
-            ref_dims = (1,)
-            datatype = {
-                "class": "H5T_VLEN",
-                "base": {"class": "H5T_REFERENCE", "base": "H5T_STD_REF_OBJ"},
-            }
-            xscale_ref = "datasets/" + xscale_uuid
-            value = [
-                (xscale_ref,),
-            ]
-            db.createAttribute(
-                "datasets", dset_uuid, "DIMENSION_LIST", ref_dims, datatype, value
-            )
-            item = db.getAttributeItem("datasets", dset_uuid, "DIMENSION_LIST")
-
-            attr_type = item["type"]
-            self.assertEqual(attr_type["class"], "H5T_VLEN")
-            base_type = attr_type["base"]
-            # todo - this should be H5T_REFERENCE, not H5T_OPAQUE
-            self.assertEqual(base_type["class"], "H5T_REFERENCE")
+        with Hdf5db(app_logger=self.log) as db:
+            root_id = db.getObjectIdByPath("/")
+            dset_id = db.createDataset(shape=(), dtype=np.int32)
+            db.createHardLink(root_id, "DS1", dset_id)
+            grp_id = db.createGroup()
+            db.createHardLink(root_id, "G1", grp_id)
+
+            dt_base = special_dtype(ref=Reference)
+            dt = special_dtype(vlen=dt_base)
+
+            ds1_ref = "datasets/" + dset_id
+            grp_ref = "groups/" + grp_id
+            ref_arr = np.zeros((2,), dtype=dt_base)
+            ref_arr[0] = ds1_ref
+            ref_arr[1] = grp_ref
+            vlen_arr = np.zeros((), dtype=dt)
+            vlen_arr[()] = ref_arr
+
+            db.createAttribute(root_id, "A1", vlen_arr)
+            item = db.getAttribute(root_id, "A1")
 
-    def testReadCommittedType(self):
-        filepath = getFile("committed_type.h5", "readcommitted_type.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            root_uuid = db.getUUIDByPath("/")
-            type_uuid = db.getUUIDByPath("/Sensor_Type")
-            item = db.getCommittedTypeItemByUuid(type_uuid)
-            self.assertTrue("type" in item)
             item_type = item["type"]
-            self.assertTrue(item_type["class"], "H5T_COMPOUND")
-            ds1_uuid = db.getUUIDByPath("/DS1")
-            item = db.getDatasetItemByUuid(ds1_uuid)
-            shape = item["shape"]
-            self.assertEqual(shape["class"], "H5S_SIMPLE")
-            dims = shape["dims"]
-            self.assertEqual(len(dims), 1)
-            self.assertEqual(dims[0], 4)
-            item_type = item["type"]
-            self.assertTrue("class" in item_type)
-            self.assertEqual(item_type["class"], "H5T_COMPOUND")
-            self.assertTrue("uuid" in item_type)
-            self.assertEqual(item_type["uuid"], type_uuid)
+            self.assertEqual(item_type["class"], "H5T_VLEN")
+            self.assertEqual(item_type["size"], "H5T_VARIABLE")
+            base_type = item_type["base"]
+            self.assertEqual(base_type["class"], "H5T_REFERENCE")
+            self.assertEqual(base_type["base"], "H5T_STD_REF_OBJ")
 
-            item = db.getAttributeItem("groups", root_uuid, "attr1")
-            shape = item["shape"]
-            self.assertEqual(shape["class"], "H5S_SCALAR")
-            item_type = item["type"]
-            self.assertTrue("class" in item_type)
-            self.assertEqual(item_type["class"], "H5T_COMPOUND")
-            self.assertTrue("uuid" in item_type)
-            self.assertEqual(item_type["uuid"], type_uuid)
-
-    def testWriteCommittedType(self):
-        filepath = getFile("empty.h5", "writecommittedtype.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            root_uuid = db.getUUIDByPath("/")
-            self.assertTrue(len(root_uuid) >= 36)
-            datatype = {
-                "charSet": "H5T_CSET_ASCII",
-                "class": "H5T_STRING",
-                "strPad": "H5T_STR_NULLTERM",
-                "length": 15,
-            }
-            item = db.createCommittedType(datatype)
-            type_uuid = item["id"]
-            item = db.getCommittedTypeItemByUuid(type_uuid)
-            self.assertEqual(item["id"], type_uuid)
-            self.assertEqual(item["attributeCount"], 0)
+            item_shape = item["shape"]
+            self.assertEqual(item_shape["class"], "H5S_SCALAR")
+
+    def testCommittedType(self):
+        with Hdf5db(app_logger=self.log) as db:
+            root_id = db.getObjectIdByPath("/")
+            dt = np.dtype("S15")
+
+            ctype_id = db.createCommittedType(dt)
+            db.createHardLink(root_id, "ctype", ctype_id)
+            item = db.getObjectById(ctype_id)
             now = int(time.time())
-            self.assertTrue(item["ctime"] > now - 5)
-            self.assertTrue(item["mtime"] > now - 5)
-            self.assertEqual(len(item["alias"]), 0)  # anonymous, so no alias
+            self.assertTrue(item["created"] > now - 1)
 
             item_type = item["type"]
 
@@ -1001,318 +310,123 @@ def testWriteCommittedType(self):
             self.assertEqual(item_type["charSet"], "H5T_CSET_ASCII")
             self.assertEqual(item_type["length"], 15)
 
-    def testWriteCommittedCompoundType(self):
-        filepath = getFile("empty.h5", "writecommittedcompoundtype.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            root_uuid = db.getUUIDByPath("/")
-            self.assertTrue(len(root_uuid) >= 36)
-
-            datatype = {"class": "H5T_COMPOUND", "fields": []}
-
-            fixed_str_type = {
-                "charSet": "H5T_CSET_ASCII",
-                "class": "H5T_STRING",
-                "strPad": "H5T_STR_NULLTERM",
-                "length": 15,
-            }
-
-            var_str_type = {
-                "charSet": "H5T_CSET_ASCII",
-                "class": "H5T_STRING",
-                "length": "H5T_VARIABLE",
-                "strPad": "H5T_STR_NULLTERM",
-            }
-            type_fields = []
-            type_fields.append({"name": "field_1", "type": "H5T_STD_I64BE"})
-            type_fields.append({"name": "field_2", "type": "H5T_IEEE_F64BE"})
-            type_fields.append({"name": "field_3", "type": fixed_str_type})
-            type_fields.append({"name": "field_4", "type": var_str_type})
-            datatype["fields"] = type_fields
-
-            item = db.createCommittedType(datatype)
-            type_uuid = item["id"]
-            item = db.getCommittedTypeItemByUuid(type_uuid)
-            self.assertEqual(item["id"], type_uuid)
-            self.assertEqual(item["attributeCount"], 0)
+            # create an attribute using the committed type
+            db.createAttribute(root_id, "A1", "hello world!", dtype=f"datatypes/{ctype_id}")
+            attr = db.getAttribute(root_id, "A1")
+            self.assertEqual(attr["value"], "hello world!")
+
+            attr_type = attr["type"]
+            self.assertEqual(attr_type["class"], "H5T_STRING")
+            self.assertEqual(attr_type["length"], 15)
+            self.assertEqual(attr_type["charSet"], "H5T_CSET_ASCII")
+
+    def testCommittedCompoundType(self):
+        with Hdf5db(app_logger=self.log) as db:
+            root_id = db.getObjectIdByPath("/")
+
+            dt_str = special_dtype(vlen=str)
+            fields = []
+            fields.append(("field_1", np.dtype(">i8")))
+            fields.append(("field_2", ">f8"))
+            fields.append(("field_3", np.dtype("S15")))
+            fields.append(("field_4", dt_str))
+            dt = np.dtype(fields)
+
+            ctype_id = db.createCommittedType(dt)
+            db.createHardLink(root_id, "ctype", ctype_id)
+            item = db.getObjectById(ctype_id)
             now = int(time.time())
-            self.assertTrue(item["ctime"] > now - 5)
-            self.assertTrue(item["mtime"] > now - 5)
-            self.assertEqual(len(item["alias"]), 0)  # anonymous, so no alias
+            self.assertTrue(item["created"] > now - 1)
 
             item_type = item["type"]
 
             self.assertEqual(item_type["class"], "H5T_COMPOUND")
             fields = item_type["fields"]
             self.assertEqual(len(fields), 4)
-            # todo - the last field class should be H5T_STRING, but it is getting
-            # saved to HDF5 as Opaque - see: https://github.com/h5py/h5py/issues/613
-            # this is fixed in h5py v. 2.6.0 - check the version until 2.6.0 becomes
-            # available via pip and anaconda.
-            import h5py
-
-            if h5py.version.version_tuple >= (2, 6, 0):
-                field_classes = ("H5T_INTEGER", "H5T_FLOAT", "H5T_STRING", "H5T_STRING")
-            else:
-                field_classes = ("H5T_INTEGER", "H5T_FLOAT", "H5T_STRING", "H5T_OPAQUE")
-            for i in range(4):
-                field = fields[i]
-                self.assertEqual(field["name"], "field_" + str(i + 1))
-                field_type = field["type"]
-                self.assertEqual(field_type["class"], field_classes[i])
-
-    def testToRef(self):
-
-        filepath = getFile("empty.h5", "toref.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            type_item = {
-                "order": "H5T_ORDER_LE",
-                "base_size": 1,
-                "class": "H5T_INTEGER",
-                "base": "H5T_STD_I8LE",
-                "size": 1,
-            }
-            data_list = [2, 3, 5, 7, 11]
-            ref_value = db.toRef(1, type_item, data_list)
-            self.assertEqual(ref_value, data_list)
-
-            type_item = {
-                "charSet": "H5T_CSET_ASCII",
-                "class": "H5T_STRING",
-                "length": 8,
-                "strPad": "H5T_STR_NULLPAD",
-            }
-            data_list = ["Hypertext", "as", "engine", "of", "state"]
-            ref_value = db.toRef(1, type_item, data_list)
-
-    def testToTuple(self):
-        filepath = getFile("empty.h5", "totuple.h5")
-        data1d = [1, 2, 3]
-        data2d = [[1, 2], [3, 4]]
-        data3d = [[[1, 2], [3, 4]], [[5, 6], [7, 8]]]
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            self.assertEqual(db.toTuple(1, data1d), [1, 2, 3])
-            self.assertEqual(db.toTuple(2, data2d), [[1, 2], [3, 4]])
-            self.assertEqual(db.toTuple(1, data2d), [(1, 2), (3, 4)])
-            self.assertEqual(
-                db.toTuple(3, data3d), [[[1, 2], [3, 4]], [[5, 6], [7, 8]]]
-            )
-            self.assertEqual(
-                db.toTuple(2, data3d), [[(1, 2), (3, 4)], [(5, 6), (7, 8)]]
-            )
-            self.assertEqual(
-                db.toTuple(1, data3d), [((1, 2), (3, 4)), ((5, 6), (7, 8))]
-            )
-
-    def testBytesArrayToList(self):
-        filepath = getFile("empty.h5", "bytestostring.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-
-            val = db.bytesArrayToList(b"Hello")
-            self.assertTrue(type(val) is str)
-            val = db.bytesArrayToList(
-                [
-                    b"Hello",
-                ]
-            )
-            self.assertEqual(len(val), 1)
-            self.assertTrue(type(val[0]) is str)
-            self.assertEqual(val[0], "Hello")
-
-            import numpy as np
-
-            data = np.array([b"Hello"])
-            val = db.bytesArrayToList(data)
-            self.assertEqual(len(val), 1)
-            self.assertTrue(type(val[0]) is str)
-            self.assertEqual(val[0], "Hello")
-
-    def testGetDataValue(self):
-        # typeItem, value, dimension=0, dims=None):
-        filepath = getFile("empty.h5", "bytestostring.h5")
-        string_type = {
-            "charSet": "H5T_CSET_ASCII",
-            "class": "H5T_STRING",
-            "strPad": "H5T_STR_NULLTERM",
-            "length": "H5T_VARIABLE",
-        }
-
-        with Hdf5db(filepath, app_logger=self.log) as db:
-
-            import numpy as np
-
-            data = np.array([b"Hello"])
-            val = db.getDataValue(string_type, data, dimension=1, dims=(1,))
-            self.assertTrue(type(val[0]) is str)
-
-    def testGetAclDataset(self):
-        filepath = getFile("tall.h5", "getacldataset.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            d111_uuid = db.getUUIDByPath("/g1/g1.1/dset1.1.1")
-            num_acls = db.getNumAcls(d111_uuid)
-            self.assertEqual(num_acls, 0)
-            acl_dset = db.getAclDataset(d111_uuid, create=True)
-            self.assertTrue(acl_dset.name.endswith(d111_uuid))
-            self.assertEqual(len(acl_dset.dtype), 7)
-            self.assertEqual(len(acl_dset.shape), 1)
-            self.assertEqual(acl_dset.shape[0], 0)
-            num_acls = db.getNumAcls(d111_uuid)
-            self.assertEqual(num_acls, 0)
-
-    def testSetAcl(self):
-        filepath = getFile("tall.h5", "setacl.h5")
-        user1 = 123
-        user2 = 456
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            d111_uuid = db.getUUIDByPath("/g1/g1.1/dset1.1.1")
-            num_acls = db.getNumAcls(d111_uuid)
-            self.assertEqual(num_acls, 0)
-
-            # add read/write acl for user1
-            acl_user1 = db.getAcl(d111_uuid, user1)
-
-            self.assertEqual(acl_user1["userid"], 0)
-            acl_user1["userid"] = user1
-            acl_user1["readACL"] = 0
-            acl_user1["updateACL"] = 0
-            num_acls = db.getNumAcls(d111_uuid)
-            self.assertEqual(num_acls, 0)
-
-            db.setAcl(d111_uuid, acl_user1)
-            acl = db.getAcl(d111_uuid, user1)
-            num_acls = db.getNumAcls(d111_uuid)
-            self.assertEqual(num_acls, 1)
-
-            # add read-only acl for user2
-            acl_user2 = db.getAcl(d111_uuid, user2)
-            self.assertEqual(acl_user2["userid"], 0)
-            acl_user2["userid"] = user2
-            acl_user2["create"] = 0
-            acl_user2["read"] = 1
-            acl_user2["update"] = 0
-            acl_user2["delete"] = 0
-            acl_user2["readACL"] = 0
-            acl_user2["updateACL"] = 0
-            db.setAcl(d111_uuid, acl_user2)
-            num_acls = db.getNumAcls(d111_uuid)
-            self.assertEqual(num_acls, 2)
-
-            # fetch and verify acls
-            acl = db.getAcl(d111_uuid, user1)
-            self.assertEqual(acl["userid"], user1)
-            self.assertEqual(acl["create"], 1)
-            self.assertEqual(acl["read"], 1)
-            self.assertEqual(acl["update"], 1)
-            self.assertEqual(acl["delete"], 1)
-            self.assertEqual(acl["readACL"], 0)
-            self.assertEqual(acl["updateACL"], 0)
-
-            acl = db.getAcl(d111_uuid, user2)
-            self.assertEqual(acl["userid"], user2)
-            self.assertEqual(acl["create"], 0)
-            self.assertEqual(acl["read"], 1)
-            self.assertEqual(acl["update"], 0)
-            self.assertEqual(acl["delete"], 0)
-            self.assertEqual(acl["readACL"], 0)
-            self.assertEqual(acl["updateACL"], 0)
-
-            num_acls = db.getNumAcls(d111_uuid)
-            self.assertEqual(num_acls, 2)
-
-            # get acl data_list
-            acls = db.getAcls(d111_uuid)
-            self.assertEqual(len(acls), 2)
-
-    def testRootAcl(self):
-        filepath = getFile("tall.h5", "rootacl.h5")
-        user1 = 123
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            root_uuid = db.getUUIDByPath("/")
-            d111_uuid = db.getUUIDByPath("/g1/g1.1/dset1.1.1")
-            num_acls = db.getNumAcls(d111_uuid)
-            self.assertEqual(num_acls, 0)
-
-            # add read/write acl for user1 at root
-            acl_root = db.getAcl(root_uuid, 0)
-            self.assertEqual(acl_root["userid"], 0)
-            acl_root["create"] = 0
-            acl_root["read"] = 1
-            acl_root["update"] = 0
-            acl_root["delete"] = 0
-            acl_root["readACL"] = 0
-            acl_root["updateACL"] = 0
-            num_acls = db.getNumAcls(root_uuid)
-            self.assertEqual(num_acls, 0)
-
-            db.setAcl(root_uuid, acl_root)
-            num_acls = db.getNumAcls(root_uuid)
-            self.assertEqual(num_acls, 1)
-
-            acl = db.getAcl(d111_uuid, user1)
-            num_acls = db.getNumAcls(d111_uuid)  # this will fetch the root acl
-            self.assertEqual(num_acls, 0)
-            self.assertEqual(acl["userid"], 0)
-            self.assertEqual(acl["create"], 0)
-            self.assertEqual(acl["read"], 1)
-            self.assertEqual(acl["update"], 0)
-            self.assertEqual(acl["delete"], 0)
-            self.assertEqual(acl["readACL"], 0)
-            self.assertEqual(acl["updateACL"], 0)
-
-    def testGetEvalStr(self):
-        queries = {
-            "date == 23": "rows['date'] == 23",
-            "wind == b'W 5'": "rows['wind'] == b'W 5'",
-            "temp > 61": "rows['temp'] > 61",
-            "(date >=22) & (date <= 24)": "(rows['date'] >=22) & (rows['date'] <= 24)",
-            "(date == 21) & (temp > 70)": "(rows['date'] == 21) & (rows['temp'] > 70)",
-            "(wind == b'E 7') | (wind == b'S 7')": "(rows['wind'] == b'E 7') | (rows['wind'] == b'S 7')",
-        }
-
-        fields = ["date", "wind", "temp"]
-        filepath = getFile("empty.h5", "getevalstring.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-
-            for query in queries.keys():
-                eval_str = db._getEvalStr(query, fields)
-                self.assertEqual(eval_str, queries[query])
-                # print(query, "->", eval_str)
-
-    def testBadQuery(self):
-        queries = (
-            "foobar",  # no variable used
-            "wind = b'abc",  # non-closed literal
-            "(wind = b'N') & (temp = 32",  # missing paren
-            "foobar > 42",  # invalid field name
-            "import subprocess; subprocess.call(['ls', '/'])",
-        )  # injection attack
-
-        fields = ("date", "wind", "temp")
-        filepath = getFile("empty.h5", "badquery.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-            for query in queries:
-                try:
-                    eval_str = db._getEvalStr(query, fields)
-                    self.log.error(f"got eval_str: {eval_str}")
-                    self.assertTrue(False)  # shouldn't get here
-                except IOError:
-                    pass  # ok
-
-    def testInjectionBlock(self):
-        queries = (
-            "import subprocess; subprocess.call(['ls', '/'])",
-        )  # injection attack
-
-        fields = ("import", "subprocess", "call")
-        filepath = getFile("empty.h5", "injectionblock.h5")
-        with Hdf5db(filepath, app_logger=self.log) as db:
-
-            for query in queries:
-                try:
-                    eval_str = db._getEvalStr(query, fields)
-                    self.log.error(f"got eval_str: {eval_str}")
-                    self.assertTrue(False)  # shouldn't get here
-                except IOError:
-                    pass  # ok
+
+            # create an attribute using the committed type
+            attr_value = (42, 3.14, "circle", "area = R^2 * PI")
+            db.createAttribute(root_id, "A1", attr_value, dtype=f"datatypes/{ctype_id}")
+            attr = db.getAttribute(root_id, "A1")
+            self.assertEqual(attr["value"], list(attr_value))
+            attr_shape = attr["shape"]
+            self.assertEqual(attr_shape["class"], "H5S_SCALAR")
+
+            attr_type = attr["type"]
+            self.assertEqual(attr_type["class"], "H5T_COMPOUND")
+
+            value = db.getAttributeValue(root_id, "A1")
+            self.assertTrue(isinstance(value, np.ndarray))
+
+    def testSimpleDataset(self):
+        with Hdf5db(app_logger=self.log) as db:
+            nrows = 8
+            ncols = 10
+            shape = (nrows, ncols)
+            dtype = np.int32
+            root_id = db.getObjectIdByPath("/")
+            dset_id = db.createDataset(shape, dtype=dtype)
+            db.createHardLink(root_id, "dset", dset_id)
+            db.createAttribute(dset_id, "a1", "Hello, world")
+            sel_all = selections.select(shape, ...)
+            arr = db.getDatasetValues(dset_id, sel_all)
+            self.assertEqual(arr.dtype, dtype)
+            self.assertEqual(arr.shape, shape)
+            self.assertEqual(arr.min(), 0)
+            self.assertEqual(arr.max(), 0)
+            row = np.zeros((ncols,), dtype=dtype)
+            for i in range(nrows):
+                row[:] = list(range(i * 10, (i + 1) * 10))
+                row_sel = selections.select(shape, (slice(i, i + 1), slice(0, ncols)))
+                db.setDatasetValues(dset_id, row_sel, row)
+            arr = db.getDatasetValues(dset_id, sel_all)
+            for i in range(nrows):
+                row = np.array(list(range(i * 10, (i + 1) * 10)), dtype=dtype)
+                np.testing.assert_array_equal(arr[i, :], row)
+
+    def testScalarDataset(self):
+        dtype = np.int32
+        with Hdf5db(app_logger=self.log) as db:
+            root_id = db.getObjectIdByPath("/")
+            dset_id = db.createDataset((), dtype=dtype)
+            db.createHardLink(root_id, "dset", dset_id)
+            db.createAttribute(dset_id, "a1", "Hello, world")
+            sel_all = selections.select((), ...)
+            arr = db.getDatasetValues(dset_id, sel_all)
+            self.assertEqual(arr.dtype, dtype)
+            self.assertEqual(arr.shape, ())
+            self.assertEqual(arr[()], 0)
+            db.setDatasetValues(dset_id, sel_all, np.array(42, dtype=dtype))
+            arr = db.getDatasetValues(dset_id, sel_all)
+            self.assertEqual(arr.dtype, dtype)
+            self.assertEqual(arr.shape, ())
+            self.assertEqual(arr.min(), 42)
+            self.assertEqual(arr.max(), 42)
+
+    def testResizableDataset(self):
+        with Hdf5db(app_logger=self.log) as db:
+            nrows = 8
+            ncols = 10
+            shape = (nrows, ncols)
+            dtype = np.int32
+            maxdims = (None, ncols * 2)
+            root_id = db.getObjectIdByPath("/")
+            dset_id = db.createDataset(shape, maxdims=maxdims, dtype=dtype)
+            db.createHardLink(root_id, "dset", dset_id)
+            db.createAttribute(dset_id, "a1", "Hello, world")
+
+            # resize limited dimension
+            db.resizeDataset(dset_id, (nrows, ncols * 2))
+
+            # try to go beyond max extent
+            try:
+                db.resizeDataset(dset_id, (nrows, ncols * 3))
+                self.assertTrue(False)
+            except ValueError:
+                pass  # expected
+
+            # resize unlimited dimension
+            db.resizeDataset(dset_id, (nrows * 10, ncols))
 
 
 if __name__ == "__main__":
diff --git a/test/unit/hdf5dtype_test.py b/test/unit/hdf5dtype_test.py
index 0f67d7b..fc0ffb4 100755
--- a/test/unit/hdf5dtype_test.py
+++ b/test/unit/hdf5dtype_test.py
@@ -2,8 +2,8 @@
 # Copyright by The HDF Group.                                                #
 # All rights reserved.                                                       #
 #                                                                            #
-# This file is part of H5Serv (HDF5 REST Server) Service, Libraries and      #
-# Utilities.  The full HDF5 REST Server copyright notice, including          #
+# This file is part of HSDS (HDF5 Scalable Data Service), Libraries and      #
+# Utilities.  The full HSDS copyright notice, including                      #
 # terms governing use, modification, and redistribution, is contained in     #
 # the file COPYING, which can be found at the root of the source code        #
 # distribution tree.  If you do not have access to this file, you may        #
@@ -12,11 +12,14 @@
 import unittest
 import logging
 import numpy as np
-from h5py import special_dtype
-from h5py import check_dtype
-from h5py import Reference
-from h5py import RegionReference
+
 from h5json import hdf5dtype
+from h5json.hdf5dtype import special_dtype
+from h5json.hdf5dtype import check_dtype
+from h5json.hdf5dtype import Reference
+from h5json.hdf5dtype import RegionReference
+from h5json.hdf5dtype import isOpaqueDtype
+from h5json.hdf5dtype import isVlen
 
 
 class Hdf5dtypeTest(unittest.TestCase):
@@ -26,6 +29,31 @@ def __init__(self, *args, **kwargs):
         self.logger = logging.getLogger()
         self.logger.setLevel(logging.INFO)
 
+    def testGetBaseTypeJson(self):
+        type_json = hdf5dtype.getBaseTypeJson("H5T_IEEE_F64LE")
+        self.assertTrue("class" in type_json)
+        self.assertEqual(type_json["class"], "H5T_FLOAT")
+        self.assertTrue("base" in type_json)
+        self.assertEqual(type_json["base"], "H5T_IEEE_F64LE")
+
+        type_json = hdf5dtype.getBaseTypeJson("H5T_IEEE_F16LE")
+        self.assertTrue("class" in type_json)
+        self.assertEqual(type_json["class"], "H5T_FLOAT")
+        self.assertTrue("base" in type_json)
+        self.assertEqual(type_json["base"], "H5T_IEEE_F16LE")
+
+        type_json = hdf5dtype.getBaseTypeJson("H5T_STD_I32LE")
+        self.assertTrue("class" in type_json)
+        self.assertEqual(type_json["class"], "H5T_INTEGER")
+        self.assertTrue("base" in type_json)
+        self.assertEqual(type_json["base"], "H5T_STD_I32LE")
+
+        try:
+            hdf5dtype.getBaseTypeJson("foobar")
+            self.assertTrue(False)
+        except TypeError:
+            pass  # expected
+
     def testBaseIntegerTypeItem(self):
         dt = np.dtype("<i1")
         typeItem = hdf5dtype.getTypeItem(dt)
@@ -44,6 +72,15 @@ def testBaseFloatTypeItem(self):
         self.assertEqual(typeItem["class"], "H5T_FLOAT")
         self.assertEqual(typeItem["base"], "H5T_IEEE_F64LE")
 
+    def testBaseFloat16TypeItem(self):
+        dt = np.dtype("<f2")
+        typeItem = hdf5dtype.getTypeItem(dt)
+        self.assertEqual(typeItem["class"], "H5T_FLOAT")
+        self.assertEqual(typeItem["base"], "H5T_IEEE_F16LE")
+        typeItem = hdf5dtype.getTypeResponse(typeItem)  # non-verbose format
+        self.assertEqual(typeItem["class"], "H5T_FLOAT")
+        self.assertEqual(typeItem["base"], "H5T_IEEE_F16LE")
+
     def testBaseStringTypeItem(self):
         dt = np.dtype("S3")
         typeItem = hdf5dtype.getTypeItem(dt)
@@ -54,12 +91,12 @@ def testBaseStringTypeItem(self):
 
     def testBaseStringUTFTypeItem(self):
         dt = np.dtype("U3")
-        try:
-            # typeItem = hdf5dtype.getTypeItem(dt)
-            hdf5dtype.getTypeItem(dt)
-            self.assertTrue(False)  # expected exception
-        except TypeError:
-            pass  # expected
+        typeItem = hdf5dtype.getTypeItem(dt)
+        self.assertEqual(typeItem["class"], "H5T_STRING")
+        # type item length in bytes (may no actual be enough space for some UTF strings)
+        self.assertEqual(typeItem["length"], 12)
+        self.assertEqual(typeItem["strPad"], "H5T_STR_NULLPAD")
+        self.assertEqual(typeItem["charSet"], "H5T_CSET_UTF8")
 
     def testBaseVLenAsciiTypeItem(self):
         dt = special_dtype(vlen=bytes)
@@ -91,8 +128,9 @@ def testBaseEnumTypeItem(self):
         self.assertEqual(baseItem["class"], "H5T_INTEGER")
         self.assertEqual(baseItem["base"], "H5T_STD_I8LE")
         self.assertTrue("members" in typeItem)
-        mapp_out = dict((m["name"], m["value"]) for m in typeItem["members"])
-        self.assertEqual(mapp_out["GREEN"], 1)
+        members = typeItem["members"]
+        expected = [{'name': 'RED', 'value': 0}, {'name': 'GREEN', 'value': 1}, {'name': 'BLUE', 'value': 2}]
+        self.assertEqual(members, expected)
         self.assertEqual(typeSize, 1)
 
     def testBaseBoolTypeItem(self):
@@ -105,9 +143,8 @@ def testBaseBoolTypeItem(self):
         self.assertTrue("members" in typeItem)
         members = typeItem["members"]
         self.assertEqual(len(members), 2)
-        mapping = dict((m["name"], m["value"]) for m in members)
-        self.assertEqual(mapping["FALSE"], 0)
-        self.assertEqual(mapping["TRUE"], 1)
+        self.assertEqual(members[0], {"name": "FALSE", "value": 0})
+        self.assertEqual(members[1], {"name": "TRUE", "value": 1})
         self.assertEqual(typeSize, 1)
 
     def testBaseArrayTypeItem(self):
@@ -115,27 +152,31 @@ def testBaseArrayTypeItem(self):
         typeItem = hdf5dtype.getTypeItem(dt)
         typeSize = hdf5dtype.getItemSize(typeItem)
         self.assertEqual(typeItem["class"], "H5T_ARRAY")
+        self.assertTrue("dims" in typeItem)
+        self.assertEqual(typeItem["dims"], (2, 2,))
         baseItem = typeItem["base"]
         self.assertEqual(baseItem["class"], "H5T_INTEGER")
         self.assertEqual(baseItem["base"], "H5T_STD_I32LE")
         self.assertEqual(typeSize, 16)
 
     def testObjReferenceTypeItem(self):
-        # dt = np.dtype('S48', metadata={'ref': val.__class__})
         dt = special_dtype(ref=Reference)
         typeItem = hdf5dtype.getTypeItem(dt)
         typeSize = hdf5dtype.getItemSize(typeItem)
         self.assertEqual(typeItem["class"], "H5T_REFERENCE")
         self.assertEqual(typeItem["base"], "H5T_STD_REF_OBJ")
-        self.assertEqual(typeSize, "H5T_VARIABLE")
+        # length of obj id, e.g.:
+        # g-b2c9a750-a557-11e7-ab09-0242ac110009
+        self.assertEqual(typeSize, 48)
 
     def testRegionReferenceTypeItem(self):
         dt = special_dtype(ref=RegionReference)
         typeItem = hdf5dtype.getTypeItem(dt)
         typeSize = hdf5dtype.getItemSize(typeItem)
+        self.assertEqual(typeSize, 48)
         self.assertEqual(typeItem["class"], "H5T_REFERENCE")
-        self.assertEqual(typeItem["base"], "H5T_STD_REF_DSETREG")
-        self.assertEqual(typeSize, "H5T_VARIABLE")
+        # self.assertEqual(typeItem['base'], 'H5T_STD_REF_DSETREG')
+        # self.assertEqual(typeSize, 'H5T_VARIABLE')
 
     def testCompoundArrayTypeItem(self):
         dt = np.dtype([("a", "<i1"), ("b", "S1", (10,))])
@@ -157,6 +198,32 @@ def testCompoundArrayTypeItem(self):
         self.assertEqual(field_b_basetype["class"], "H5T_STRING")
         self.assertEqual(typeSize, 11)
 
+    def testEnumArrayTypeItem(self):
+        mapping = {"RED": 0, "GREEN": 1, "BLUE": 2}
+        dt_enum = special_dtype(enum=(np.int8, mapping))
+        typeItem = hdf5dtype.getTypeItem(dt_enum)
+        dt_array = np.dtype("(2,3)" + dt_enum.str, metadata=dict(dt_enum.metadata))
+
+        typeItem = hdf5dtype.getTypeItem(dt_array)
+
+        self.assertEqual(typeItem["class"], "H5T_ARRAY")
+        self.assertTrue("dims" in typeItem)
+        self.assertEqual(typeItem["dims"], (2, 3))
+        baseItem = typeItem["base"]
+        self.assertEqual(baseItem["class"], "H5T_ENUM")
+        self.assertTrue("members" in baseItem)
+        members = baseItem["members"]
+        self.assertEqual(len(members), 3)
+        self.assertEqual(members[0], {"name": "RED", "value": 0})
+        self.assertEqual(members[1], {"name": "GREEN", "value": 1})
+        self.assertEqual(members[2], {"name": "BLUE", "value": 2})
+        self.assertTrue("base" in baseItem)
+        basePrim = baseItem["base"]
+        self.assertEqual(basePrim["class"], "H5T_INTEGER")
+        self.assertEqual(basePrim["base"], "H5T_STD_I8LE")
+        typeSize = hdf5dtype.getItemSize(typeItem)
+        self.assertEqual(typeSize, 6)  # one-byte for base enum type * shape of (2,3)
+
     def testCompoundArrayVlenIntTypeItem(self):
         dt_vlen = special_dtype(vlen=np.int32)
         dt_arr = np.dtype((dt_vlen, (4,)))
@@ -165,7 +232,8 @@ def testCompoundArrayVlenIntTypeItem(self):
         )
         typeItem = hdf5dtype.getTypeItem(dt_compound)
 
-        # typeSize = hdf5dtype.getItemSize(typeItem)
+        typeSize = hdf5dtype.getItemSize(typeItem)
+        self.assertEqual(typeSize, "H5T_VARIABLE")
         self.assertEqual(typeItem["class"], "H5T_COMPOUND")
         fields = typeItem["fields"]
         field_a = fields[0]
@@ -182,12 +250,12 @@ def testCompoundArrayVlenIntTypeItem(self):
         field_c_type = field_c["type"]
         self.assertEqual(field_c_type["class"], "H5T_ARRAY")
         self.assertEqual(field_c_type["dims"], (4,))
-        field_c_base_type = field_c_type["base"]
-        self.assertEqual(field_c_base_type["class"], "H5T_VLEN")
-        self.assertEqual(field_c_base_type["size"], "H5T_VARIABLE")
-        field_c_base_base_type = field_c_base_type["base"]
-        self.assertEqual(field_c_base_base_type["class"], "H5T_INTEGER")
-        self.assertEqual(field_c_base_base_type["base"], "H5T_STD_I32LE")
+        field_c_type_base = field_c_type["base"]
+        self.assertEqual(field_c_type_base["class"], "H5T_VLEN")
+        self.assertEqual(field_c_type_base["size"], "H5T_VARIABLE")
+        field_c_type_base_base = field_c_type_base["base"]
+        self.assertEqual(field_c_type_base_base["class"], "H5T_INTEGER")
+        self.assertEqual(field_c_type_base_base["base"], "H5T_STD_I32LE")
 
     def testCompoundArrayVlenStringTypeItem(self):
         dt_vlen = special_dtype(vlen=bytes)
@@ -197,7 +265,8 @@ def testCompoundArrayVlenStringTypeItem(self):
         )
         typeItem = hdf5dtype.getTypeItem(dt_compound)
 
-        # typeSize = hdf5dtype.getItemSize(typeItem)
+        typeSize = hdf5dtype.getItemSize(typeItem)
+        self.assertEqual(typeSize, "H5T_VARIABLE")
         self.assertEqual(typeItem["class"], "H5T_COMPOUND")
         fields = typeItem["fields"]
         field_a = fields[0]
@@ -220,8 +289,35 @@ def testCompoundArrayVlenStringTypeItem(self):
         self.assertEqual(field_c_base_type["length"], "H5T_VARIABLE")
         self.assertEqual(field_c_base_type["charSet"], "H5T_CSET_ASCII")
 
+    def testCompoundArrayVlenStr(self):
+        dt_str = special_dtype(vlen=str)
+        dt_arr_str = np.dtype((dt_str, (3, 2)))
+        dt_compound = np.dtype([("VALUE1", "i4"), ("VALUE2", dt_arr_str)])
+        self.assertTrue(isVlen(dt_compound))
+        type_item = hdf5dtype.getTypeItem(dt_compound)
+        typeSize = hdf5dtype.getItemSize(type_item)
+        self.assertEqual(typeSize, "H5T_VARIABLE")
+        self.assertEqual(type_item["class"], "H5T_COMPOUND")
+        fields = type_item["fields"]
+        field_a = fields[0]
+        self.assertEqual(field_a["name"], "VALUE1")
+        field_a_type = field_a["type"]
+        self.assertEqual(field_a_type["class"], "H5T_INTEGER")
+        self.assertEqual(field_a_type["base"], "H5T_STD_I32LE")
+
+        field_b = fields[1]
+        field_b_type = field_b["type"]
+
+        self.assertEqual(field_b_type["class"], "H5T_ARRAY")
+        self.assertEqual(field_b_type["dims"], (3, 2))
+        field_b_base_type = field_b_type["base"]
+        self.assertEqual(field_b_base_type["class"], "H5T_STRING")
+        self.assertEqual(field_b_base_type["length"], "H5T_VARIABLE")
+        self.assertEqual(field_b_base_type["charSet"], "H5T_CSET_UTF8")
+
     def testOpaqueTypeItem(self):
         dt = np.dtype("V200")
+        self.assertTrue(isOpaqueDtype(dt))
         typeItem = hdf5dtype.getTypeItem(dt)
         typeSize = hdf5dtype.getItemSize(typeItem)
         self.assertEqual(typeItem["class"], "H5T_OPAQUE")
@@ -281,7 +377,23 @@ def testCompoundTypeItem(self):
         self.assertEqual(tempFieldType["base"], "H5T_IEEE_F32LE")
         self.assertEqual(typeSize, 10)
 
-    def testCompoundofCompoundTypeItem(self):
+    def testCompoundOnfFieldTypeItem(self):
+        dt = np.dtype([("temp", np.float32),])
+        typeItem = hdf5dtype.getTypeItem(dt)
+        typeSize = hdf5dtype.getItemSize(typeItem)
+        self.assertEqual(typeItem["class"], "H5T_COMPOUND")
+        self.assertTrue("fields" in typeItem)
+        fields = typeItem["fields"]
+        self.assertEqual(len(fields), 1)
+        tempField = fields[0]
+        self.assertEqual(tempField["name"], "temp")
+        self.assertTrue("type" in tempField)
+        tempFieldType = tempField["type"]
+        self.assertEqual(tempFieldType["class"], "H5T_FLOAT")
+        self.assertEqual(tempFieldType["base"], "H5T_IEEE_F32LE")
+        self.assertEqual(typeSize, 4)
+
+    def testCompoundOfCompoundTypeItem(self):
         dt1 = np.dtype([("x", np.float32), ("y", np.float32)])
         dt2 = np.dtype([("a", np.float32), ("b", np.float32), ("c", np.float32)])
         dt = np.dtype([("field1", dt1), ("field2", dt2)])
@@ -309,6 +421,7 @@ def testCreateBaseType(self):
         self.assertEqual(dt.name, "uint32")
         self.assertEqual(dt.byteorder, ">")
         self.assertEqual(dt.kind, "u")
+        self.assertFalse(isVlen(dt))
 
         dt = hdf5dtype.createDataType("H5T_STD_I16LE")
         self.assertEqual(dt.name, "int16")
@@ -317,10 +430,12 @@ def testCreateBaseType(self):
         dt = hdf5dtype.createDataType("H5T_IEEE_F64LE")
         self.assertEqual(dt.name, "float64")
         self.assertEqual(dt.kind, "f")
+        self.assertFalse(isVlen(dt))
 
         dt = hdf5dtype.createDataType("H5T_IEEE_F32LE")
         self.assertEqual(dt.name, "float32")
         self.assertEqual(dt.kind, "f")
+        self.assertFalse(isVlen(dt))
 
         typeItem = {"class": "H5T_INTEGER", "base": "H5T_STD_I32BE"}
         typeSize = hdf5dtype.getItemSize(typeItem)
@@ -328,6 +443,7 @@ def testCreateBaseType(self):
         self.assertEqual(dt.name, "int32")
         self.assertEqual(dt.kind, "i")
         self.assertEqual(typeSize, 4)
+        self.assertFalse(isVlen(dt))
 
     def testCreateBaseStringType(self):
         typeItem = {"class": "H5T_STRING", "charSet": "H5T_CSET_ASCII", "length": 6}
@@ -336,15 +452,18 @@ def testCreateBaseStringType(self):
         self.assertEqual(dt.name, "bytes48")
         self.assertEqual(dt.kind, "S")
         self.assertEqual(typeSize, 6)
+        self.assertFalse(isVlen(dt))
 
     def testCreateBaseUnicodeType(self):
-        typeItem = {"class": "H5T_STRING", "charSet": "H5T_CSET_UTF8", "length": 32}
-        try:
-            # dt = hdf5dtype.createDataType(typeItem)
-            hdf5dtype.createDataType(typeItem)
-            self.assertTrue(False)  # expected exception
-        except TypeError:
-            pass
+        typeItem = {"class": "H5T_STRING", "charSet": "H5T_CSET_UTF8", "length": 6}
+
+        dt = hdf5dtype.createDataType(typeItem)
+        typeSize = hdf5dtype.getItemSize(typeItem)
+        self.assertTrue(dt is not None)
+        self.assertEqual(dt.name, "bytes48")
+        self.assertEqual(dt.kind, "S")  # uses byte
+        self.assertEqual(typeSize, 6)
+        self.assertFalse(isVlen(dt))
 
     def testCreateNullTermStringType(self):
         typeItem = {
@@ -355,9 +474,11 @@ def testCreateNullTermStringType(self):
         }
         typeSize = hdf5dtype.getItemSize(typeItem)
         dt = hdf5dtype.createDataType(typeItem)
+
         self.assertEqual(dt.name, "bytes48")
         self.assertEqual(dt.kind, "S")
         self.assertEqual(typeSize, 6)
+        self.assertFalse(isVlen(dt))
 
     def testCreateVLenStringType(self):
         typeItem = {
@@ -371,6 +492,28 @@ def testCreateVLenStringType(self):
         self.assertEqual(dt.kind, "O")
         self.assertEqual(check_dtype(vlen=dt), bytes)
         self.assertEqual(typeSize, "H5T_VARIABLE")
+        self.assertTrue(isVlen(dt))
+
+    def testCreateVLenStringArrayType(self):
+        typeItem = {
+            "class": "H5T_ARRAY",
+            "dims": (2, 2),
+            "base": {
+                "class": "H5T_STRING",
+                "charSet": "H5T_CSET_ASCII",
+                "length": "H5T_VARIABLE",
+            }
+        }
+        typeSize = hdf5dtype.getItemSize(typeItem)
+        dt = hdf5dtype.createDataType(typeItem)
+        self.assertEqual(dt.name, "void256")  # assuming 8-byte pointers
+        self.assertEqual(dt.kind, "V")
+        self.assertEqual(dt.shape, (2, 2))
+        self.assertEqual(check_dtype(vlen=dt), None)
+        self.assertEqual(check_dtype(vlen=dt.base), bytes)
+        self.assertEqual(typeSize, "H5T_VARIABLE")
+        self.assertEqual(dt.base.kind, 'O')
+        self.assertTrue(isVlen(dt))
 
     def testCreateVLenUTF8Type(self):
         typeItem = {
@@ -384,14 +527,16 @@ def testCreateVLenUTF8Type(self):
         self.assertEqual(dt.kind, "O")
         self.assertEqual(check_dtype(vlen=dt), str)
         self.assertEqual(typeSize, "H5T_VARIABLE")
+        self.assertTrue(isVlen(dt))
 
     def testCreateVLenDataType(self):
         typeItem = {"class": "H5T_VLEN", "base": "H5T_STD_I32BE"}
         typeSize = hdf5dtype.getItemSize(typeItem)
+        self.assertEqual(typeSize, "H5T_VARIABLE")
         dt = hdf5dtype.createDataType(typeItem)
         self.assertEqual(dt.name, "object")
         self.assertEqual(dt.kind, "O")
-        self.assertEqual(typeSize, "H5T_VARIABLE")
+        self.assertTrue(isVlen(dt))
 
     def testCreateOpaqueType(self):
         typeItem = {"class": "H5T_OPAQUE", "size": 200}
@@ -400,17 +545,13 @@ def testCreateOpaqueType(self):
         self.assertEqual(dt.name, "void1600")
         self.assertEqual(dt.kind, "V")
         self.assertEqual(typeSize, 200)
+        self.assertFalse(isVlen(dt))
 
     def testCreateEnumType(self):
         typeItem = {
             "class": "H5T_ENUM",
             "base": {"base": "H5T_STD_I16LE", "class": "H5T_INTEGER"},
-            "members": [
-                {"name": "GAS", "value": 2},
-                {"name": "LIQUID", "value": 1},
-                {"name": "PLASMA", "value": 3},
-                {"name": "SOLID", "value": 0},
-            ],
+            "mapping": {"GAS": 2, "LIQUID": 1, "PLASMA": 3, "SOLID": 0},
         }
 
         typeSize = hdf5dtype.getItemSize(typeItem)
@@ -424,12 +565,13 @@ def testCreateEnumType(self):
         self.assertEqual(mapping["LIQUID"], 1)
         self.assertEqual(mapping["GAS"], 2)
         self.assertEqual(mapping["PLASMA"], 3)
+        self.assertFalse(isVlen(dt))
 
     def testCreateBoolType(self):
         typeItem = {
             "class": "H5T_ENUM",
             "base": {"base": "H5T_STD_I8LE", "class": "H5T_INTEGER"},
-            "members": [{"name": "TRUE", "value": 1}, {"name": "FALSE", "value": 0}],
+            "mapping": {"TRUE": 1, "FALSE": 0},
         }
 
         typeSize = hdf5dtype.getItemSize(typeItem)
@@ -437,6 +579,38 @@ def testCreateBoolType(self):
         dt = hdf5dtype.createDataType(typeItem)
         self.assertEqual(dt.name, "bool")
         self.assertEqual(dt.kind, "b")
+        self.assertEqual(typeSize, hdf5dtype.getDtypeItemSize(dt))
+        self.assertFalse(isVlen(dt))
+
+    def testCreateReferenceType(self):
+        typeItem = {
+            "class": "H5T_REFERENCE",
+            "base": "H5T_STD_REF_OBJ",
+            "length": 48,
+            "charSet": "H5T_CSET_ASCII",
+            "strPad": "H5T_STR_NULLPAD"
+        }
+        typeSize = hdf5dtype.getItemSize(typeItem)
+        self.assertEqual(typeSize, 48)
+        dt = hdf5dtype.createDataType(typeItem)
+        self.assertEqual(dt.kind, "S")
+        self.assertTrue(dt.metadata['ref'] is Reference)
+        self.assertEqual(check_dtype(ref=dt), Reference)
+        self.assertFalse(isVlen(dt))
+
+    def testCreateVlenReferenceType(self):
+        typeItem = {
+            'class': 'H5T_VLEN',
+            'base': {'class': 'H5T_REFERENCE', 'base': 'H5T_STD_REF_OBJ'}
+        }
+        typeSize = hdf5dtype.getItemSize(typeItem)
+        self.assertEqual(typeSize, 'H5T_VARIABLE')
+        dt = hdf5dtype.createDataType(typeItem)
+        self.assertEqual(dt.kind, "O")
+        base = dt.metadata['vlen']
+        self.assertTrue(base.metadata['ref'] is Reference)
+        self.assertEqual(check_dtype(ref=base), Reference)
+        self.assertTrue(isVlen(dt))
 
     def testCreateCompoundType(self):
         typeItem = {
@@ -461,11 +635,35 @@ def testCreateCompoundType(self):
         self.assertEqual(dt.name, "void144")
         self.assertEqual(dt.kind, "V")
         self.assertEqual(len(dt.fields), 4)
+        self.assertEqual(typeSize, hdf5dtype.getDtypeItemSize(dt))
+        self.assertTrue(isVlen(dt))
+
         dtLocation = dt[2]
         self.assertEqual(dtLocation.name, "object")
         self.assertEqual(dtLocation.kind, "O")
         self.assertEqual(check_dtype(vlen=dtLocation), bytes)
         self.assertEqual(typeSize, "H5T_VARIABLE")
+        self.assertEqual(typeSize, hdf5dtype.getDtypeItemSize(dtLocation))
+
+    def testCreateCompoundInvalidFieldName(self):
+        typeItem = {
+            "class": "H5T_COMPOUND",
+            "fields": [
+                {
+                    "name": "\u03b1",
+                    "type": {"base": "H5T_STD_I32LE", "class": "H5T_INTEGER"},
+                },
+                {
+                    "name": "\u03c9",
+                    "type": {"base": "H5T_STD_I32LE", "class": "H5T_INTEGER"},
+                },
+            ],
+        }
+        try:
+            hdf5dtype.createDataType(typeItem)
+            self.assertTrue(False)
+        except TypeError:
+            pass  # expected
 
     def testCreateCompoundOfCompoundType(self):
         typeItem = {
@@ -528,6 +726,7 @@ def testCreateCompoundOfCompoundType(self):
         self.assertEqual(dt.name, "void160")
         self.assertEqual(dt.kind, "V")
         self.assertEqual(len(dt.fields), 2)
+        self.assertFalse(isVlen(dt))
         dt_field1 = dt[0]
         self.assertEqual(dt_field1.name, "void64")
         self.assertEqual(dt_field1.kind, "V")
@@ -552,6 +751,8 @@ def testCreateCompoundTypeUnicodeFields(self):
         self.assertEqual(dt.kind, "V")
         self.assertEqual(len(dt.fields), 3)
         self.assertEqual(typeSize, 10)
+        self.assertEqual(typeSize, hdf5dtype.getDtypeItemSize(dt))
+        self.assertFalse(isVlen(dt))
 
     def testCreateArrayType(self):
         typeItem = {"class": "H5T_ARRAY", "base": "H5T_STD_I64LE", "dims": (3, 5)}
@@ -559,16 +760,46 @@ def testCreateArrayType(self):
         dt = hdf5dtype.createDataType(typeItem)
         self.assertEqual(dt.name, "void960")
         self.assertEqual(dt.kind, "V")
+        self.assertEqual(dt.base.kind, "i")
         self.assertEqual(typeSize, 120)
+        self.assertEqual(typeSize, hdf5dtype.getDtypeItemSize(dt))
+        self.assertFalse(isVlen(dt))
+
+    def testCreateCompoundArrayVlenType(self):
+        typeItem = {
+            "fields": [
+                {"type": {"class": "H5T_INTEGER", "base": "H5T_STD_U64BE"}, "name": "VALUE"},
+                {"type": {"class": "H5T_FLOAT", "base": "H5T_IEEE_F64BE"}, "name": "VALUE2"},
+                {"type": {"class": "H5T_ARRAY", "dims": [8],
+                          "base": {
+                              "class": "H5T_STRING",
+                              "charSet": "H5T_CSET_ASCII",
+                              "strPad": "H5T_STR_NULLTERM",
+                              "length": "H5T_VARIABLE"
+                            }  # noqa: E126
+                          },
+                 "name": "VALUE3"}
+                ],  # noqa: E123
+            "class": "H5T_COMPOUND"
+        }
+        typeSize = hdf5dtype.getItemSize(typeItem)
+        dt = hdf5dtype.createDataType(typeItem)
+        self.assertEqual(dt.name, "void640")
+        self.assertEqual(dt.kind, "V")
+        self.assertEqual(typeSize, "H5T_VARIABLE")
+        self.assertEqual(typeSize, hdf5dtype.getDtypeItemSize(dt))
+        self.assertTrue(isVlen(dt))
+        dt_arr = dt["VALUE3"]
+        self.assertEqual(dt_arr.kind, "V")
+        self.assertEqual(dt_arr.shape, (8,))
+        self.assertEqual(dt_arr.metadata, None)
 
     def testCreateArrayIntegerType(self):
         typeItem = {"class": "H5T_INTEGER", "base": "H5T_STD_I64LE", "dims": (3, 5)}
 
         try:
             hdf5dtype.createDataType(typeItem)
-            self.assertTrue(
-                False
-            )  # expected exception - dims used with none array type
+            self.assertTrue(False)  # expected exception - dims used with non-array type
         except TypeError:
             pass  # should get exception
 
@@ -581,6 +812,7 @@ def testCreateVlenObjRefType(self):
         self.assertEqual(dt.name, "object")
         self.assertEqual(dt.kind, "O")
         self.assertTrue(check_dtype(ref=dt) is None)
+        self.assertTrue(isVlen(dt))
         dt_base = check_dtype(vlen=dt)
         self.assertTrue(dt_base is not None)
         self.assertTrue(check_dtype(ref=dt_base) is Reference)
@@ -611,6 +843,45 @@ def testCreateCompoundArrayType(self):
         self.assertTrue("a" in dt.fields.keys())
         self.assertTrue("b" in dt.fields.keys())
         self.assertEqual(typeSize, 11)
+        self.assertEqual(typeSize, hdf5dtype.getDtypeItemSize(dt))
+        self.assertFalse(isVlen(dt))
+
+    def testCompoundArrayType(self):
+        typeItem = {
+            "class": "H5T_COMPOUND",
+            "fields": [
+                {
+                    "type": {"class": "H5T_INTEGER", "base": "H5T_STD_U64BE"},
+                    "name": "VALUE1",
+                },
+                {
+                    "type": {"class": "H5T_FLOAT", "base": "H5T_IEEE_F64BE"},
+                    "name": "VALUE2",
+                },
+                {
+                    "type": {
+                        "class": "H5T_ARRAY",
+                        "dims": [2],
+                        "base": {
+                            "class": "H5T_STRING",
+                            "charSet": "H5T_CSET_ASCII",
+                            "strPad": "H5T_STR_NULLTERM",
+                            "length": "H5T_VARIABLE",
+                        },
+                    },
+                    "name": "VALUE3",
+                },
+            ],
+        }
+        dt = hdf5dtype.createDataType(typeItem)
+        typeSize = hdf5dtype.getItemSize(typeItem)
+        self.assertEqual(typeSize, "H5T_VARIABLE")
+        self.assertTrue(isVlen(dt))
+        self.assertEqual(len(dt), 3)
+        self.assertTrue("VALUE1" in dt.fields.keys())
+        self.assertTrue("VALUE2" in dt.fields.keys())
+        self.assertTrue("VALUE3" in dt.fields.keys())
+        self.assertEqual(typeSize, hdf5dtype.getDtypeItemSize(dt))
 
 
 if __name__ == "__main__":
diff --git a/test/unit/hsds_reader_test.py b/test/unit/hsds_reader_test.py
new file mode 100644
index 0000000..72cf601
--- /dev/null
+++ b/test/unit/hsds_reader_test.py
@@ -0,0 +1,109 @@
+##############################################################################
+# Copyright by The HDF Group.                                                #
+# All rights reserved.                                                       #
+#                                                                            #
+# This file is part of H5Serv (HDF5 REST Server) Service, Libraries and      #
+# Utilities.  The full HDF5 REST Server copyright notice, including          #
+# terms governing use, modification, and redistribution, is contained in     #
+# the file COPYING, which can be found at the root of the source code        #
+# distribution tree.  If you do not have access to this file, you may        #
+# request a copy from help@hdfgroup.org.                                     #
+##############################################################################
+import unittest
+import logging
+import numpy as np
+from h5json import Hdf5db
+from h5json.hsdsstore.hsds_reader import HSDSReader
+from h5json import selections
+
+
+class HSDSReaderTest(unittest.TestCase):
+    def __init__(self, *args, **kwargs):
+        super(HSDSReaderTest, self).__init__(*args, **kwargs)
+        # main
+
+        self.log = logging.getLogger()
+        if len(self.log.handlers) > 0:
+            lhStdout = self.log.handlers[0]  # stdout is the only handler initially
+        else:
+            lhStdout = None
+
+        self.log.setLevel(logging.DEBUG)
+        handler = logging.FileHandler("./hsds_reader_test.log")
+        # add handler to logger
+        self.log.addHandler(handler)
+
+        if lhStdout is not None:
+            self.log.removeHandler(lhStdout)
+
+    def testSimple(self):
+        filepath = "/home/test_user1/test/tall.h5"
+        kwargs = {"app_logger": self.log}
+        with Hdf5db(**kwargs) as db:
+            hsds_reader = HSDSReader(filepath, **kwargs)
+            db.reader = hsds_reader
+            root_id = db.getObjectIdByPath("/")
+            root_json = db.getObjectById(root_id)
+
+            root_attrs = root_json["attributes"]
+            self.assertEqual(len(root_attrs), 2)
+            self.assertEqual(list(root_attrs.keys()), ["attr1", "attr2"])
+            root_links = root_json["links"]
+            self.assertEqual(len(root_links), 2)
+            self.assertEqual(list(root_links.keys()), ["g1", "g2"])
+            g1_link = root_links["g1"]
+            self.assertEqual(g1_link["class"], "H5L_TYPE_HARD")
+            g1_id = g1_link["id"]
+            self.assertEqual(g1_id, db.getObjectIdByPath("/g1/"))
+            dset111_id = db.getObjectIdByPath("/g1/g1.1/dset1.1.1")
+            dset_json = db.getObjectById(dset111_id)
+            dset_type = dset_json["type"]
+            self.assertEqual(dset_type["class"], "H5T_INTEGER")
+            self.assertEqual(dset_type["base"], "H5T_STD_I32BE")
+            dset_attrs = dset_json["attributes"]
+            self.assertEqual(len(dset_attrs), 2)
+            self.assertEqual(list(dset_attrs.keys()), ["attr1", "attr2"])
+            dset_shape = dset_json["shape"]
+            self.assertEqual(dset_shape["class"], "H5S_SIMPLE")
+            self.assertEqual(dset_shape["dims"], [10, 10])
+
+            # got the 5th row of the dataset
+            sel_row = selections.select((10, 10), (5, slice(0, 10)))
+            row = db.getDatasetValues(dset111_id, sel_row)
+            self.assertTrue(isinstance(row, np.ndarray))
+            self.assertEqual(row.shape, (10,))
+            for i in range(10):
+                v = row[i]
+                self.assertEqual(v, i * 5)
+
+            sel_all = selections.select((10, 10), ...)
+            arr = db.getDatasetValues(dset111_id, sel_all)
+            self.assertTrue(isinstance(arr, np.ndarray))
+            self.assertEqual(arr.shape, (10, 10))
+            for i in range(10):
+                for j in range(10):
+                    v = arr[i, j]
+                    self.assertEqual(v, i * j)
+
+            # try adding an attribute
+            db.createAttribute(dset111_id, "attr3", value=42)
+            dset_json = db.getObjectById(dset111_id)
+            dset_attrs = dset_json["attributes"]
+            self.assertEqual(len(dset_attrs), 3)
+            self.assertEqual(list(dset_attrs.keys()), ["attr1", "attr2", "attr3"])
+            attr3_json = dset_attrs["attr3"]
+            attr3_shape = attr3_json["shape"]
+            self.assertEqual(attr3_shape["class"], "H5S_SCALAR")
+            attr3_type = attr3_json["type"]
+            self.assertEqual(attr3_type["class"], "H5T_INTEGER")
+            self.assertEqual(attr3_type["base"], "H5T_STD_I64LE")
+            attr3_value = attr3_json["value"]
+            self.assertEqual(attr3_value, 42)
+
+            db.close()
+
+
+if __name__ == "__main__":
+    # setup test files
+
+    unittest.main()
diff --git a/test/unit/hsds_writer_test.py b/test/unit/hsds_writer_test.py
new file mode 100644
index 0000000..a3ba9be
--- /dev/null
+++ b/test/unit/hsds_writer_test.py
@@ -0,0 +1,82 @@
+##############################################################################
+# Copyright by The HDF Group.                                                #
+# All rights reserved.                                                       #
+#                                                                            #
+# This file is part of H5Serv (HDF5 REST Server) Service, Libraries and      #
+# Utilities.  The full HDF5 REST Server copyright notice, including          #
+# terms governing use, modification, and redistribution, is contained in     #
+# the file COPYING, which can be found at the root of the source code        #
+# distribution tree.  If you do not have access to this file, you may        #
+# request a copy from help@hdfgroup.org.                                     #
+##############################################################################
+import unittest
+import time
+import logging
+import h5py
+import numpy as np
+from h5json import Hdf5db
+from h5json.hsdsstore.hsds_writer import HSDSWriter
+from h5json.hdf5dtype import special_dtype, Reference
+from h5json import selections
+
+
+class HSDSWriterTest(unittest.TestCase):
+    def __init__(self, *args, **kwargs):
+        super(HSDSWriterTest, self).__init__(*args, **kwargs)
+        # main
+
+        # create logger
+        logfname = "hsds_writer_test.log"
+        loglevel = logging.DEBUG
+        logging.basicConfig(filename=logfname, format='%(levelname)s %(asctime)s %(message)s', level=loglevel)
+        self.log = logging.getLogger()
+        self.log.info("init!")
+
+    def testSimple(self):
+
+        filepath = "/home/test_user1/writer_test.h5"
+        db = Hdf5db(app_logger=self.log)
+        db.writer = HSDSWriter(filepath)
+        root_id = db.open()
+        print("root_id:", root_id)
+        db.createAttribute(root_id, "attr1", value=[1, 2, 3, 4])
+        db.createAttribute(root_id, "attr2", 42)
+        g1_id = db.createGroup()
+        db.createHardLink(root_id, "g1", g1_id)
+        db.createAttribute(g1_id, "a1", "hello")
+        g2_id = db.createGroup()
+        db.createHardLink(root_id, "g2", g2_id)
+
+        g1_1_id = db.createGroup()
+        db.createHardLink(g1_id, "g1.1", g1_1_id)
+        dset_111_id = db.createDataset(shape=(10, 10), dtype=np.int32)
+        arr = np.zeros((10, 10), dtype=np.int32)
+        for i in range(10):
+            for j in range(10):
+                arr[i, j] = i * j
+        sel_all = selections.select((10, 10), ...)
+        db.setDatasetValues(dset_111_id, sel_all, arr)
+        db.createHardLink(g1_1_id, "dset1.1.1", dset_111_id)
+        db.createSoftLink(g2_id, "slink", "somewhere")
+        db.createExternalLink(g2_id, "extlink", "somewhere", "someplace")
+        db.createCustomLink(g2_id, "cust", {"foo": "bar"})
+        db.flush()
+
+        db.createAttribute(g1_id, "a2", "bye-bye")
+        db.flush()
+
+        g21 = db.createGroup()
+        db.createHardLink(g2_id, "g2.1", g21)
+        db.flush()
+
+        sel = selections.select((10, 10), (slice(4, 5), slice(4, 5)))
+        arr = np.zeros((), dtype=np.int32)
+        arr[()] = 42
+        db.setDatasetValues(dset_111_id, sel, arr)
+        db.close()
+
+
+if __name__ == "__main__":
+    # setup test files
+
+    unittest.main()
diff --git a/test/unit/objid_test.py b/test/unit/objid_test.py
new file mode 100755
index 0000000..d74ec10
--- /dev/null
+++ b/test/unit/objid_test.py
@@ -0,0 +1,211 @@
+##############################################################################
+# Copyright by The HDF Group.                                                #
+# All rights reserved.                                                       #
+#                                                                            #
+# This file is part of HSDS (HDF5 Scalable Data Service), Libraries and      #
+# Utilities.  The full HSDS copyright notice, including                      #
+# terms governing use, modification, and redistribution, is contained in     #
+# the file COPYING, which can be found at the root of the source code        #
+# distribution tree.  If you do not have access to this file, you may        #
+# request a copy from help@hdfgroup.org.                                     #
+##############################################################################
+import unittest
+
+from h5json.objid import isRootObjId, isValidUuid, validateUuid
+from h5json.objid import createObjId, getCollectionForId, getUuidFromId
+from h5json.objid import isObjId, isS3ObjKey, getS3Key, getObjId, isSchema2Id
+
+
+class IdUtilTest(unittest.TestCase):
+    def __init__(self, *args, **kwargs):
+        super(IdUtilTest, self).__init__(*args, **kwargs)
+        # main
+
+    def testCreateObjId(self):
+        id_len = 38  # 36 for uuid plus two for prefix ("g-", "d-")
+        ids = set()  # we'll use this to verify we always get a unique id
+        # create just a plain uuid...
+        id = createObjId()
+        self.assertEqual(len(id) + 2, id_len)
+        # create a v2 root_id
+        root_id = createObjId(obj_type="groups")
+        self.assertEqual(len(root_id), id_len)
+        for obj_type in ("groups", "datasets", "datatypes", "chunks"):
+            for i in range(100):
+                id = createObjId(obj_type=obj_type, root_id=root_id)
+                self.assertEqual(len(id), id_len)
+                self.assertTrue(id[0] in ("g", "d", "t", "c"))
+                self.assertEqual(id[1], "-")
+                ids.add(id)
+
+        self.assertEqual(len(ids), 400)
+        try:
+            createObjId(obj_type="bad_class")
+            self.assertTrue(False)  # should throw exception
+        except ValueError:
+            pass  # expected
+
+    def testIsValidUuid(self):
+        group1_id = "g-314d61b8-9954-11e6-a733-3c15c2da029e"      # orig schema
+        group2_id = "g-314d61b8-995411e6-a733-3c15c2-da029e"
+        root_id = "g-f9aaa28e-d42e10e5-7122-2a065c-a6986d"
+        dataset1_id = "d-4c48f3ae-9954-11e6-a3cd-3c15c2da029e"    # orig schema
+        dataset2_id = "d-4c48f3ae-995411e6-a3cd-3c15c2-da029e"
+        ctype1_id = "t-8c785f1c-9953-11e6-9bc2-0242ac110005"      # orig schema
+        ctype2_id = "t-8c785f1c-995311e6-9bc2-0242ac-110005"
+        chunk1_id = "c-8c785f1c-9953-11e6-9bc2-0242ac110005_7_2"  # orig schema
+        chunk2_id = "c-8c785f1c-995311e6-9bc2-0242ac-110005_7_2"
+        domain_id = "mybucket/bob/mydata.h5"
+        s3_domain_id = "s3://mybucket/bob/mydata.h5"
+        file_domain_id = "file://mybucket/bob/mydata.h5"
+        azure_domain_id = "https://myaccount.blob.core.windows.net/mybucket/bob/mydata.h5"
+        valid_id_map = {
+            group1_id: "a49be-g-314d61b8-9954-11e6-a733-3c15c2da029e",
+            group2_id: "db/314d61b8-995411e6/g/a733-3c15c2-da029e/.group.json",
+            dataset1_id: "26928-d-4c48f3ae-9954-11e6-a3cd-3c15c2da029e",
+            dataset2_id: "db/4c48f3ae-995411e6/d/a3cd-3c15c2-da029e/.dataset.json",
+            ctype1_id: "5a9cf-t-8c785f1c-9953-11e6-9bc2-0242ac110005",
+            ctype2_id: "db/8c785f1c-995311e6/t/9bc2-0242ac-110005/.datatype.json",
+            chunk1_id: "dc4ce-c-8c785f1c-9953-11e6-9bc2-0242ac110005_7_2",
+            chunk2_id: "db/8c785f1c-995311e6/d/9bc2-0242ac-110005/7_2",
+            domain_id: "bob/mydata.h5/.domain.json",
+            s3_domain_id: "bob/mydata.h5/.domain.json",
+            file_domain_id: "bob/mydata.h5/.domain.json",
+            azure_domain_id: "bob/mydata.h5/.domain.json", }
+
+        bad_ids = ("g-1e76d862", "/bob/mydata.h5")
+
+        self.assertTrue(isValidUuid(group1_id))
+        self.assertFalse(isSchema2Id(group1_id))
+        self.assertTrue(isValidUuid(group1_id, obj_class="Group"))
+        self.assertTrue(isValidUuid(group1_id, obj_class="group"))
+        self.assertTrue(isValidUuid(group1_id, obj_class="groups"))
+        self.assertTrue(isSchema2Id(root_id))
+        self.assertTrue(isValidUuid(root_id, obj_class="Group"))
+        self.assertTrue(isValidUuid(root_id, obj_class="group"))
+        self.assertTrue(isValidUuid(root_id, obj_class="groups"))
+        self.assertTrue(isRootObjId(root_id))
+        self.assertTrue(isValidUuid(dataset1_id, obj_class="datasets"))
+        self.assertFalse(isSchema2Id(dataset1_id))
+        self.assertTrue(isValidUuid(ctype1_id, obj_class="datatypes"))
+        self.assertFalse(isSchema2Id(ctype1_id))
+        self.assertTrue(isValidUuid(chunk1_id, obj_class="chunks"))
+        self.assertFalse(isSchema2Id(chunk1_id))
+        self.assertTrue(isValidUuid(group2_id))
+        self.assertTrue(isSchema2Id(group2_id))
+        self.assertTrue(isValidUuid(group2_id, obj_class="Group"))
+        self.assertTrue(isValidUuid(group2_id, obj_class="group"))
+        self.assertTrue(isValidUuid(group2_id, obj_class="groups"))
+        self.assertFalse(isRootObjId(group2_id))
+        self.assertTrue(isValidUuid(dataset2_id, obj_class="datasets"))
+        self.assertTrue(isSchema2Id(dataset2_id))
+        self.assertTrue(isValidUuid(ctype2_id, obj_class="datatypes"))
+        self.assertTrue(isSchema2Id(ctype2_id))
+        self.assertTrue(isValidUuid(chunk2_id, obj_class="chunks"))
+        self.assertTrue(isSchema2Id(chunk2_id))
+        validateUuid(group1_id)
+        try:
+            isRootObjId(group1_id)
+            self.assertTrue(False)
+        except ValueError:
+            # only works for v2 schema
+            pass  # expected
+
+        for item in valid_id_map:
+            self.assertTrue(isObjId(item))
+            s3key = getS3Key(item)
+            self.assertTrue(s3key[0] != "/")
+            self.assertTrue(isS3ObjKey(s3key))
+            expected = valid_id_map[item]
+            self.assertEqual(s3key, expected)
+            if item.find("/") > 0:
+                continue  # bucket name gets lost when domain ids get converted to s3keys
+            objid = getObjId(s3key)
+            self.assertEqual(objid, item)
+        for item in bad_ids:
+            self.assertFalse(isValidUuid(item))
+            self.assertFalse(isObjId(item))
+
+    def testGetCollection(self):
+        group_id = "g-314d61b8-9954-11e6-a733-3c15c2da029e"
+        dataset_id = "d-4c48f3ae-9954-11e6-a3cd-3c15c2da029e"
+        ctype_id = "t-8c785f1c-9953-11e6-9bc2-0242ac110005"
+        bad_id = "x-59647858-9954-11e6-95d2-3c15c2da029e"
+        self.assertEqual(getCollectionForId(group_id), "groups")
+        self.assertEqual(getCollectionForId(dataset_id), "datasets")
+        self.assertEqual(getCollectionForId(ctype_id), "datatypes")
+        self.assertEqual(getUuidFromId(group_id), "314d61b8-9954-11e6-a733-3c15c2da029e")
+        try:
+            getCollectionForId(bad_id)
+            self.assertTrue(False)
+        except ValueError:
+            pass  # expected
+        try:
+            getCollectionForId(None)
+            self.assertTrue(False)
+        except ValueError:
+            pass  # expected
+
+    def testSchema2Id(self):
+        root_id = createObjId("groups")
+        group_id = createObjId("groups", root_id=root_id)
+        dataset_id = createObjId("datasets", root_id=root_id)
+        ctype_id = createObjId("datatypes", root_id=root_id)
+
+        self.assertEqual(getCollectionForId(root_id), "groups")
+        self.assertEqual(getCollectionForId(group_id), "groups")
+        self.assertEqual(getCollectionForId(dataset_id), "datasets")
+        self.assertEqual(getCollectionForId(ctype_id), "datatypes")
+        chunk_id = "c" + dataset_id[1:] + "_1_2"
+        chunk_partition_id = "c42-" + dataset_id[2:] + "_1_2"
+
+        for id in (chunk_id, chunk_partition_id):
+            try:
+                getCollectionForId(id)
+                self.assertTrue(False)
+            except ValueError:
+                pass  # expected
+        valid_ids = (
+            group_id,
+            dataset_id,
+            ctype_id,
+            chunk_id,
+            chunk_partition_id,
+            root_id,
+        )
+        s3prefix = getS3Key(root_id)
+        self.assertTrue(s3prefix.endswith("/.group.json"))
+        s3prefix = s3prefix[: -(len(".group.json"))]
+        for oid in valid_ids:
+            self.assertTrue(len(oid) >= 38)
+            parts = oid.split("-")
+            self.assertEqual(len(parts), 6)
+            self.assertTrue(oid[0] in ("g", "d", "t", "c"))
+            self.assertTrue(isSchema2Id(oid))
+            if oid == root_id:
+                self.assertTrue(isRootObjId(oid))
+            else:
+                self.assertFalse(isRootObjId(oid))
+
+            s3key = getS3Key(oid)
+            self.assertTrue(s3key.startswith(s3prefix))
+            self.assertEqual(getObjId(s3key), oid)
+            self.assertTrue(isS3ObjKey(s3key))
+
+    def testGetDataTypeId(self):
+        test_uuid = "9b652223-83f8-11e5-b028-3c15c2da029e"
+        test_ids = (
+            "datatypes/9b652223-83f8-11e5-b028-3c15c2da029e",
+            "datatypes/t-9b652223-83f8-11e5-b028-3c15c2da029e",
+            "t-9b652223-83f8-11e5-b028-3c15c2da029e"
+        )
+        for test_id in test_ids:
+            self.assertTrue(isValidUuid(test_id))
+            self.assertEqual(getCollectionForId(test_id), "datatypes")
+            self.assertEqual(getUuidFromId(test_id), test_uuid)
+
+
+if __name__ == "__main__":
+    # setup test files
+
+    unittest.main()
diff --git a/testall.py b/testall.py
index 8e5d041..45e0610 100755
--- a/testall.py
+++ b/testall.py
@@ -15,7 +15,28 @@
 import shutil
 import h5py
 
-unit_tests = ("hdf5dtype_test", "hdf5db_test")
+unit_tests = [
+    "array_util_test",
+    "objid_test",
+    "hdf5dtype_test",
+    "hdf5db_test",
+    "h5json_reader_test",
+    "h5json_writer_test",
+    "h5py_reader_test",
+    "h5py_writer_test",
+]
+
+use_hsds = True
+for key in ("HS_ENDPOINT", "HS_USERNAME", "HS_PASSWORD"):
+    if key not in os.environ:
+        use_hsds = False
+        print(f"not including HSDS tests, no {key} environment set")
+        break
+
+if use_hsds:
+    unit_tests.append("hsds_reader_test")
+unit_tests = tuple(unit_tests)
+
 integ_tests = ("h5tojson_test", "jsontoh5_test")
 
 # verify the hdf5 lib version is recent
@@ -28,6 +49,9 @@
     print(h5py.version.info)
     sys.exit("Need h5py version 3.0 or later")
 
+if not os.path.isdir("./test/unit/out"):
+    os.makedirs("test/unit/out")
+
 # Run all hdf5-json tests
 # Run this script before running any integ tests
 for file_name in unit_tests:
@@ -39,6 +63,13 @@
 os.remove("hdf5dbtest.log")
 
 os.chdir("test/integ")
+
+if not os.path.isdir("./h5_out"):
+    os.makedirs("h5_out")
+
+if not os.path.isdir("./json_out"):
+    os.makedirs("json_out")
+
 for file_name in integ_tests:
     print(file_name)
     rc = os.system("python " + file_name + ".py")