Skip to content

Commit f326072

Browse files
committed
id_table: use smaller hash without 'use' field
use classic thombstone technique. validate table overflow on insertion by counting free slots. it is not exact check, so could lead to size explosure.
1 parent 61f9f27 commit f326072

File tree

2 files changed

+245
-2
lines changed

2 files changed

+245
-2
lines changed

id_table.c

Lines changed: 244 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
* hash
2525
* 21: funny falcon's Coalesced Hashing implementation [Feature #6962]
2626
* 22: simple open addressing with quadratic probing.
27+
* 23: open addressing with quadratic probing without thumbstone count.
2728
* mix (list + hash)
2829
* 31: list(12) (capa <= 32) + hash(22)
2930
* 32: list(14) (capa <= 32) + hash(22)
@@ -32,7 +33,7 @@
3233
*/
3334

3435
#ifndef ID_TABLE_IMPL
35-
#define ID_TABLE_IMPL 31
36+
#define ID_TABLE_IMPL 23
3637
#endif
3738

3839
#if ID_TABLE_IMPL == 0
@@ -106,6 +107,13 @@
106107
#define ID_TABLE_USE_SMALL_HASH 1
107108
#define ID_TABLE_USE_ID_SERIAL 1
108109

110+
#elif ID_TABLE_IMPL == 23
111+
#define ID_TABLE_NAME hash
112+
#define ID_TABLE_IMPL_TYPE struct hash_id_table
113+
114+
#define ID_TABLE_USE_SMALLER_HASH 1
115+
#define ID_TABLE_USE_ID_SERIAL 1
116+
109117
#elif ID_TABLE_IMPL == 31
110118
#define ID_TABLE_NAME mix
111119
#define ID_TABLE_IMPL_TYPE struct mix_id_table
@@ -1359,6 +1367,241 @@ hash_id_table_foreach_values(struct hash_id_table *tbl, enum rb_id_table_iterato
13591367
}
13601368
#endif /* ID_TABLE_USE_SMALL_HASH */
13611369

1370+
#if ID_TABLE_USE_SMALLER_HASH
1371+
#define HASH_MIN_CAPA 4
1372+
1373+
struct hash_id_table {
1374+
int capa;
1375+
int num;
1376+
id_key_t *keys;
1377+
};
1378+
#define TABLE_VALUES(tbl) ((VALUE *)((tbl)->keys + (tbl)->capa))
1379+
static struct hash_id_table *
1380+
hash_id_table_init(struct hash_id_table *tbl, size_t capa)
1381+
{
1382+
if (capa > 0) {
1383+
tbl->capa = (int)capa;
1384+
tbl->keys = (id_key_t *)xmalloc(sizeof(id_key_t) * capa + sizeof(VALUE) * capa);
1385+
}
1386+
return tbl;
1387+
}
1388+
1389+
static struct hash_id_table *
1390+
hash_id_table_create(size_t capa)
1391+
{
1392+
struct hash_id_table *tbl = ZALLOC(struct hash_id_table);
1393+
return hash_id_table_init(tbl, capa);
1394+
}
1395+
1396+
static void
1397+
hash_id_table_free(struct hash_id_table *tbl)
1398+
{
1399+
xfree(tbl->keys);
1400+
xfree(tbl);
1401+
}
1402+
1403+
static void
1404+
hash_id_table_clear(struct hash_id_table *tbl)
1405+
{
1406+
xfree(tbl->keys);
1407+
memset(tbl, 0, sizeof(*tbl));
1408+
}
1409+
1410+
static size_t
1411+
hash_id_table_size(struct hash_id_table *tbl)
1412+
{
1413+
return (size_t)tbl->num;
1414+
}
1415+
1416+
static size_t
1417+
hash_id_table_memsize(struct hash_id_table *tbl)
1418+
{
1419+
return (sizeof(id_key_t) + sizeof(VALUE)) * tbl->capa + sizeof(struct hash_id_table);
1420+
}
1421+
1422+
static void
1423+
hash_table_add(struct hash_id_table *tbl, id_key_t key, VALUE val)
1424+
{
1425+
id_key_t *keys = tbl->keys;
1426+
int mask = tbl->capa - 1;
1427+
int pos = key & mask;
1428+
int d = 1;
1429+
while (keys[pos]) {
1430+
pos = (pos + d) & mask;
1431+
d++;
1432+
}
1433+
keys[pos] = key;
1434+
TABLE_VALUES(tbl)[pos] = val;
1435+
tbl->num++;
1436+
}
1437+
1438+
static void
1439+
hash_table_extend(struct hash_id_table *tbl)
1440+
{
1441+
const int capa = tbl->capa == 0 ? HASH_MIN_CAPA : (tbl->capa * 2);
1442+
struct hash_id_table ttbl = {capa, 0}, tttbl;
1443+
const int size = sizeof(id_key_t) * capa + sizeof(VALUE) * capa;
1444+
int i;
1445+
ttbl.keys = (id_key_t*)xcalloc(1, size);
1446+
for (i=tbl->capa-1; i>=0;i--) {
1447+
if (tbl->keys[i] && ~tbl->keys[i]) {
1448+
hash_table_add(&ttbl, tbl->keys[i], TABLE_VALUES(tbl)[i]);
1449+
}
1450+
}
1451+
tttbl = *tbl;
1452+
*tbl = ttbl;
1453+
xfree(tttbl.keys);
1454+
}
1455+
1456+
static int
1457+
hash_table_index(struct hash_id_table *tbl, id_key_t key)
1458+
{
1459+
id_key_t *keys = tbl->keys;
1460+
int mask = tbl->capa - 1;
1461+
int pos = key & mask;
1462+
int d = 1;
1463+
if (tbl->capa == 0) {
1464+
return -1;
1465+
}
1466+
while (keys[pos] != key) {
1467+
if (!keys[pos]) return -1;
1468+
pos = (pos + d) & mask;
1469+
d++;
1470+
}
1471+
return pos;
1472+
}
1473+
1474+
static int
1475+
hash_id_table_lookup(struct hash_id_table *tbl, ID id, VALUE *valp)
1476+
{
1477+
id_key_t key = id2key(id);
1478+
int index = hash_table_index(tbl, key);
1479+
1480+
if (index >= 0) {
1481+
*valp = TABLE_VALUES(tbl)[index];
1482+
return TRUE;
1483+
}
1484+
else {
1485+
return FALSE;
1486+
}
1487+
}
1488+
1489+
static int
1490+
hash_id_table_insert(struct hash_id_table *tbl, ID id, VALUE val)
1491+
{
1492+
id_key_t key = id2key(id);
1493+
id_key_t *keys = tbl->keys;
1494+
int mask = tbl->capa - 1;
1495+
int free = 0;
1496+
int pos = key & mask;
1497+
int d = 1;
1498+
/* we should be sure that empty slot remains after insertion */
1499+
int max = tbl->capa == 4 ? 4 :
1500+
tbl->capa <= 16 ? tbl->capa / 2 : tbl->capa / 4;
1501+
int freecnt = 2;
1502+
int set = FALSE;
1503+
while (max && freecnt) {
1504+
if (keys[pos] == key) {
1505+
TABLE_VALUES(tbl)[pos] = val;
1506+
set = TRUE;
1507+
freecnt--;
1508+
}
1509+
if (!free && !(keys[pos] && ~keys[pos])) {
1510+
free = pos+1;
1511+
}
1512+
if (!keys[pos])
1513+
freecnt--;
1514+
pos = (pos + d) & mask;
1515+
d++;
1516+
max--;
1517+
}
1518+
if (!max) {
1519+
hash_table_extend(tbl);
1520+
hash_id_table_insert(tbl, id, val);
1521+
} else if (!set) {
1522+
pos = free - 1;
1523+
keys[pos] = key;
1524+
TABLE_VALUES(tbl)[pos] = val;
1525+
tbl->num++;
1526+
}
1527+
return TRUE;
1528+
}
1529+
1530+
static int
1531+
hash_delete_index(struct hash_id_table *tbl, int index)
1532+
{
1533+
if (index >= 0) {
1534+
tbl->keys[index] = ~0;
1535+
tbl->num--;
1536+
return TRUE;
1537+
} else {
1538+
return FALSE;
1539+
}
1540+
}
1541+
1542+
static int
1543+
hash_id_table_delete(struct hash_id_table *tbl, ID id)
1544+
{
1545+
const id_key_t key = id2key(id);
1546+
int index = hash_table_index(tbl, key);
1547+
return hash_delete_index(tbl, index);
1548+
}
1549+
1550+
static void
1551+
hash_id_table_foreach(struct hash_id_table *tbl, enum rb_id_table_iterator_result (*func)(ID id, VALUE val, void *data), void *data)
1552+
{
1553+
int capa = tbl->capa;
1554+
int i;
1555+
const id_key_t *keys = tbl->keys;
1556+
const VALUE *values = TABLE_VALUES(tbl);
1557+
enum rb_id_table_iterator_result ret;
1558+
1559+
for (i=0; i<capa; i++) {
1560+
const id_key_t key = keys[i];
1561+
if (key && ~key) {
1562+
ret = (*func)(key2id(key), values[i], data);
1563+
assert(key != 0);
1564+
1565+
switch (ret) {
1566+
case ID_TABLE_STOP:
1567+
return;
1568+
case ID_TABLE_DELETE:
1569+
hash_delete_index(tbl, i);
1570+
case ID_TABLE_CONTINUE:
1571+
break;
1572+
}
1573+
}
1574+
}
1575+
}
1576+
1577+
static void
1578+
hash_id_table_foreach_values(struct hash_id_table *tbl, enum rb_id_table_iterator_result (*func)(VALUE val, void *data), void *data)
1579+
{
1580+
int capa = tbl->capa;
1581+
int i;
1582+
const id_key_t *keys = tbl->keys;
1583+
VALUE *values = TABLE_VALUES(tbl);
1584+
enum rb_id_table_iterator_result ret;
1585+
1586+
for (i=0; i<capa; i++) {
1587+
const id_key_t key = keys[i];
1588+
if (key && ~key) {
1589+
ret = (*func)(values[i], data);
1590+
assert(key != 0);
1591+
1592+
switch (ret) {
1593+
case ID_TABLE_STOP:
1594+
return;
1595+
case ID_TABLE_DELETE:
1596+
hash_delete_index(tbl, i);
1597+
case ID_TABLE_CONTINUE:
1598+
break;
1599+
}
1600+
}
1601+
}
1602+
}
1603+
#endif /* ID_TABLE_USE_SMALLER_HASH */
1604+
13621605
#if ID_TABLE_USE_MIX
13631606

13641607
struct mix_id_table {

symbol.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -616,7 +616,7 @@ next_id_base(void)
616616
{
617617
rb_id_serial_t next_serial = global_symbols.last_id + 1;
618618

619-
if (next_serial == 0) {
619+
if (next_serial == ~(rb_id_serial_t)0) {
620620
return (ID)-1;
621621
}
622622
else {

0 commit comments

Comments
 (0)