From 3fb049ae84286c9fa7b1631c534fa83f609bb887 Mon Sep 17 00:00:00 2001 From: kosbourdos Date: Fri, 17 Feb 2023 20:02:58 +0200 Subject: [PATCH 01/16] Add 'and' operator in select where statement --- mdb.py | 2 -- miniDB/database.py | 24 +++++++++++++++++++++--- miniDB/table.py | 1 + 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/mdb.py b/mdb.py index a981e5be..ba6b0aa9 100644 --- a/mdb.py +++ b/mdb.py @@ -44,9 +44,7 @@ def create_query_plan(query, keywords, action): This can and will be used recursively ''' - dic = {val: None for val in keywords if val!=';'} - ql = [val for val in query.split(' ') if val !=''] kw_in_query = [] diff --git a/miniDB/database.py b/miniDB/database.py index a3ac6be7..8613ae1b 100644 --- a/miniDB/database.py +++ b/miniDB/database.py @@ -354,16 +354,29 @@ def select(self, columns, table_name, condition, distinct=None, order_by=None, \ # print(table_name) self.load_database() - if isinstance(table_name,Table): + if isinstance(table_name, Table): return table_name._select_where(columns, condition, distinct, order_by, desc, limit) if condition is not None: - condition_column = split_condition(condition)[0] + if 'and' not in condition: + condition_column = split_condition(condition)[0] else: condition_column = '' + conditions = [] + and_flag = False + if 'and' in condition: + and_flag = True + for cond in condition.split('and'): + conditions.append(cond) + else: + print('or pending..') + + # self.lock_table(table_name, mode='x') + + if self.is_locked(table_name): return if self._has_index(table_name) and condition_column==self.tables[table_name].column_names[self.tables[table_name].pk_idx]: @@ -371,7 +384,12 @@ def select(self, columns, table_name, condition, distinct=None, order_by=None, \ bt = self._load_idx(index_name) table = self.tables[table_name]._select_where_with_btree(columns, bt, condition, distinct, order_by, desc, limit) else: - table = self.tables[table_name]._select_where(columns, condition, distinct, order_by, desc, limit) + if and_flag == False: + table = self.tables[table_name]._select_where(columns, condition, distinct, order_by, desc, limit) + else: + table = self.tables[table_name]._select_where(columns, conditions[0], distinct, order_by, desc, limit) + for cond in conditions[1:]: + table = table._select_where(columns, cond, distinct, order_by, desc, limit) # self.unlock_table(table_name) if save_as is not None: table._name = save_as diff --git a/miniDB/table.py b/miniDB/table.py index f5c7d937..0757f84a 100644 --- a/miniDB/table.py +++ b/miniDB/table.py @@ -236,6 +236,7 @@ def _select_where(self, return_columns, condition=None, distinct=False, order_by column_name, operator, value = self._parse_condition(condition) column = self.column_by_name(column_name) rows = [ind for ind, x in enumerate(column) if get_op(operator, x, value)] + print(rows) else: rows = [i for i in range(len(self.data))] From 2f2dda10bc9cd475cb8398656628ea19a61e97b9 Mon Sep 17 00:00:00 2001 From: kosbourdos Date: Fri, 17 Feb 2023 21:00:58 +0200 Subject: [PATCH 02/16] Add 'and' operator in delete_from statement --- miniDB/database.py | 23 +++++++++++++++-------- miniDB/table.py | 23 ++++++++++++++++------- 2 files changed, 31 insertions(+), 15 deletions(-) diff --git a/miniDB/database.py b/miniDB/database.py index 8613ae1b..46b83fbe 100644 --- a/miniDB/database.py +++ b/miniDB/database.py @@ -319,7 +319,14 @@ def delete_from(self, table_name, condition): Operatores supported: (<,<=,==,>=,>) ''' self.load_database() - + + conditions = [] + if 'and' in condition: + for cond in condition.split('and'): + conditions.append(cond) + + + lock_ownership = self.lock_table(table_name, mode='x') deleted = self.tables[table_name]._delete_where(condition) if lock_ownership: @@ -363,14 +370,14 @@ def select(self, columns, table_name, condition, distinct=None, order_by=None, \ else: condition_column = '' - conditions = [] and_flag = False - if 'and' in condition: - and_flag = True - for cond in condition.split('and'): - conditions.append(cond) - else: - print('or pending..') + if condition is not None: + conditions = [] + if 'and' in condition: + and_flag = True + for cond in condition.split('and'): + conditions.append(cond) + diff --git a/miniDB/table.py b/miniDB/table.py index 0757f84a..d9933200 100644 --- a/miniDB/table.py +++ b/miniDB/table.py @@ -182,14 +182,24 @@ def _delete_where(self, condition): Operatores supported: (<,<=,==,>=,>) ''' - column_name, operator, value = self._parse_condition(condition) + lists_of_indexes = [] + for cond in condition.split('and'): + indexes = [] + column_name, operator, value = self._parse_condition(cond) - indexes_to_del = [] + column = self.column_by_name(column_name) + for index, row_value in enumerate(column): + if get_op(operator, row_value, value): + indexes.append(index) + + lists_of_indexes.append(indexes) + + intersection_set = set(lists_of_indexes[0]) + for l in lists_of_indexes[1:]: + intersection_set = intersection_set.intersection(l) + + indexes_to_del = list(intersection_set) - column = self.column_by_name(column_name) - for index, row_value in enumerate(column): - if get_op(operator, row_value, value): - indexes_to_del.append(index) # we pop from highest to lowest index in order to avoid removing the wrong item # since we dont delete, we dont have to to pop in that order, but since delete is used @@ -236,7 +246,6 @@ def _select_where(self, return_columns, condition=None, distinct=False, order_by column_name, operator, value = self._parse_condition(condition) column = self.column_by_name(column_name) rows = [ind for ind, x in enumerate(column) if get_op(operator, x, value)] - print(rows) else: rows = [i for i in range(len(self.data))] From 2e93704dc7b1b9e18a480f8b7e58d7cb46403c38 Mon Sep 17 00:00:00 2001 From: kosbourdos Date: Sat, 18 Feb 2023 18:41:03 +0200 Subject: [PATCH 03/16] Add 'and' operator in delete_from statement --- miniDB/database.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/miniDB/database.py b/miniDB/database.py index 46b83fbe..109662a0 100644 --- a/miniDB/database.py +++ b/miniDB/database.py @@ -378,12 +378,6 @@ def select(self, columns, table_name, condition, distinct=None, order_by=None, \ for cond in condition.split('and'): conditions.append(cond) - - - - # self.lock_table(table_name, mode='x') - - if self.is_locked(table_name): return if self._has_index(table_name) and condition_column==self.tables[table_name].column_names[self.tables[table_name].pk_idx]: From 28b3385f9a1d31edad930288ae1e06433442ef90 Mon Sep 17 00:00:00 2001 From: gk Date: Sat, 18 Feb 2023 20:38:53 +0200 Subject: [PATCH 04/16] Added or func in select_where --- miniDB/database.py | 11 +++++++---- miniDB/table.py | 11 ++++++++--- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/miniDB/database.py b/miniDB/database.py index 109662a0..1cf6e877 100644 --- a/miniDB/database.py +++ b/miniDB/database.py @@ -366,11 +366,13 @@ def select(self, columns, table_name, condition, distinct=None, order_by=None, \ if condition is not None: if 'and' not in condition: - condition_column = split_condition(condition)[0] + if 'or' not in condition: + condition_column = split_condition(condition)[0] else: condition_column = '' and_flag = False + # or_flag = False if condition is not None: conditions = [] if 'and' in condition: @@ -385,12 +387,13 @@ def select(self, columns, table_name, condition, distinct=None, order_by=None, \ bt = self._load_idx(index_name) table = self.tables[table_name]._select_where_with_btree(columns, bt, condition, distinct, order_by, desc, limit) else: - if and_flag == False: - table = self.tables[table_name]._select_where(columns, condition, distinct, order_by, desc, limit) - else: + if and_flag == True: table = self.tables[table_name]._select_where(columns, conditions[0], distinct, order_by, desc, limit) for cond in conditions[1:]: table = table._select_where(columns, cond, distinct, order_by, desc, limit) + else: + table = self.tables[table_name]._select_where(columns, condition, distinct, order_by, desc, limit) + # self.unlock_table(table_name) if save_as is not None: table._name = save_as diff --git a/miniDB/table.py b/miniDB/table.py index d9933200..05d921bc 100644 --- a/miniDB/table.py +++ b/miniDB/table.py @@ -243,9 +243,14 @@ def _select_where(self, return_columns, condition=None, distinct=False, order_by # if condition is None, return all rows # if not, return the rows with values where condition is met for value if condition is not None: - column_name, operator, value = self._parse_condition(condition) - column = self.column_by_name(column_name) - rows = [ind for ind, x in enumerate(column) if get_op(operator, x, value)] + list_of_indexes = [] + for cond in condition.split(' or '): + column_name, operator, value = self._parse_condition(cond) + column = self.column_by_name(column_name) + rows = [ind for ind, x in enumerate(column) if get_op(operator, x, value)] + for idx in rows: + list_of_indexes.append(idx) + rows = list(set(list_of_indexes)) else: rows = [i for i in range(len(self.data))] From 330edd8dd7153ec7de5e4944eb5ecb9eb24ffa6d Mon Sep 17 00:00:00 2001 From: kosbourdos Date: Sat, 18 Feb 2023 20:41:33 +0200 Subject: [PATCH 05/16] add 'NOT' func in select statement --- miniDB/database.py | 32 ++++++++++++++++++++++++-------- miniDB/misc.py | 33 ++++++++++++++++++++++++++------- 2 files changed, 50 insertions(+), 15 deletions(-) diff --git a/miniDB/database.py b/miniDB/database.py index 1cf6e877..2bec63a8 100644 --- a/miniDB/database.py +++ b/miniDB/database.py @@ -13,7 +13,7 @@ from joins import Inlj, Smj from btree import Btree -from misc import split_condition +from misc import split_condition, not_op from table import Table @@ -365,20 +365,34 @@ def select(self, columns, table_name, condition, distinct=None, order_by=None, \ return table_name._select_where(columns, condition, distinct, order_by, desc, limit) if condition is not None: - if 'and' not in condition: - if 'or' not in condition: - condition_column = split_condition(condition)[0] + if 'and' not in condition and 'or' not in condition: + condition_column = split_condition(condition)[0] else: condition_column = '' and_flag = False - # or_flag = False + not_flag = False if condition is not None: conditions = [] if 'and' in condition: and_flag = True for cond in condition.split('and'): - conditions.append(cond) + if 'not' in cond: + cond = cond.split('not')[1] + left, op, right = split_condition(cond) + op = not_op(op) + not_cond = left + op + right + conditions.append(not_cond) + else: + if 'not' in condition: + not_flag = True + without_non = condition.split('not')[1] + left, op, right = split_condition(without_non) + op = not_op(op) + condition_after_not = left + op + right + + + if self.is_locked(table_name): return @@ -387,13 +401,15 @@ def select(self, columns, table_name, condition, distinct=None, order_by=None, \ bt = self._load_idx(index_name) table = self.tables[table_name]._select_where_with_btree(columns, bt, condition, distinct, order_by, desc, limit) else: - if and_flag == True: + if not_flag: + table = self.tables[table_name]._select_where(columns, condition_after_not, distinct, order_by, desc, limit) + elif and_flag: table = self.tables[table_name]._select_where(columns, conditions[0], distinct, order_by, desc, limit) for cond in conditions[1:]: table = table._select_where(columns, cond, distinct, order_by, desc, limit) else: table = self.tables[table_name]._select_where(columns, condition, distinct, order_by, desc, limit) - + # self.unlock_table(table_name) if save_as is not None: table._name = save_as diff --git a/miniDB/misc.py b/miniDB/misc.py index aefada74..46d9fa96 100644 --- a/miniDB/misc.py +++ b/miniDB/misc.py @@ -1,30 +1,34 @@ import operator + def get_op(op, a, b): ''' Get op as a function of a and b by using a symbol ''' ops = {'>': operator.gt, - '<': operator.lt, - '>=': operator.ge, - '<=': operator.le, - '=': operator.eq} + '<': operator.lt, + '>=': operator.ge, + '<=': operator.le, + '=': operator.eq, + '<>': operator.ne} try: - return ops[op](a,b) + return ops[op](a, b) except TypeError: # if a or b is None (deleted record), python3 raises typerror return False + def split_condition(condition): ops = {'>=': operator.ge, '<=': operator.le, + '<>': operator.ne, '=': operator.eq, '>': operator.gt, '<': operator.lt} for op_key in ops.keys(): - splt=condition.split(op_key) - if len(splt)>1: + splt = condition.split(op_key) + if len(splt) > 1: left, right = splt[0].strip(), splt[1].strip() if right[0] == '"' == right[-1]: # If the value has leading and trailing quotes, remove them. @@ -48,3 +52,18 @@ def reverse_op(op): '<=' : '>=', '=' : '=' }.get(op) + + +def not_op(op): + """ + Reverse operator for 'NOT' + """ + return { + '>': '<', + '>=': '<', + '<=': ' >', + '=': '<>', + '<>': '='}.get(op) + + + From 04207dd00d988070e19559002d7df2db49fd6ee5 Mon Sep 17 00:00:00 2001 From: kosbourdos Date: Sat, 18 Feb 2023 21:38:23 +0200 Subject: [PATCH 06/16] 'NOT', 'OR', 'AND' and multiple conditions full functional on select --- miniDB/database.py | 13 +++++++++---- miniDB/misc.py | 1 + miniDB/table.py | 14 +++++++++++--- 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/miniDB/database.py b/miniDB/database.py index 2bec63a8..5ed2fbef 100644 --- a/miniDB/database.py +++ b/miniDB/database.py @@ -376,15 +376,17 @@ def select(self, columns, table_name, condition, distinct=None, order_by=None, \ conditions = [] if 'and' in condition: and_flag = True - for cond in condition.split('and'): - if 'not' in cond: - cond = cond.split('not')[1] + for cond in condition.split(' and '): + if 'not ' in cond: + cond = cond.split('not ')[1] left, op, right = split_condition(cond) op = not_op(op) not_cond = left + op + right conditions.append(not_cond) + else: + conditions.append(cond) else: - if 'not' in condition: + if 'and' not in condition and 'or' not in condition and 'not' in condition: not_flag = True without_non = condition.split('not')[1] left, op, right = split_condition(without_non) @@ -394,6 +396,9 @@ def select(self, columns, table_name, condition, distinct=None, order_by=None, \ + + + if self.is_locked(table_name): return if self._has_index(table_name) and condition_column==self.tables[table_name].column_names[self.tables[table_name].pk_idx]: diff --git a/miniDB/misc.py b/miniDB/misc.py index 46d9fa96..fe758adb 100644 --- a/miniDB/misc.py +++ b/miniDB/misc.py @@ -60,6 +60,7 @@ def not_op(op): """ return { '>': '<', + '<': ' >', '>=': '<', '<=': ' >', '=': '<>', diff --git a/miniDB/table.py b/miniDB/table.py index 05d921bc..d25bac0b 100644 --- a/miniDB/table.py +++ b/miniDB/table.py @@ -6,7 +6,7 @@ sys.path.append(f'{os.path.dirname(os.path.dirname(os.path.abspath(__file__)))}/miniDB') -from misc import get_op, split_condition +from misc import get_op, split_condition, not_op class Table: @@ -245,8 +245,16 @@ def _select_where(self, return_columns, condition=None, distinct=False, order_by if condition is not None: list_of_indexes = [] for cond in condition.split(' or '): - column_name, operator, value = self._parse_condition(cond) - column = self.column_by_name(column_name) + if 'not ' in cond: + cond = cond.split('not ')[1] + column_name, operator, value = self._parse_condition(cond) + column = self.column_by_name(column_name) + operator = not_op(operator) + cond = column_name + operator + str(value) + else: + column_name, operator, value = self._parse_condition(cond) + column = self.column_by_name(column_name) + rows = [ind for ind, x in enumerate(column) if get_op(operator, x, value)] for idx in rows: list_of_indexes.append(idx) From b016d15db2f2c911e7727259bb9a8ae7ed1d5102 Mon Sep 17 00:00:00 2001 From: kosbourdos Date: Sat, 18 Feb 2023 22:41:49 +0200 Subject: [PATCH 07/16] start dynamic 'AND' and 'OR' index parsing on table.py for multiple conditions --- miniDB/database.py | 36 +----------------------------------- miniDB/table.py | 33 +++++++++++++++++++-------------- 2 files changed, 20 insertions(+), 49 deletions(-) diff --git a/miniDB/database.py b/miniDB/database.py index 5ed2fbef..3cab8cce 100644 --- a/miniDB/database.py +++ b/miniDB/database.py @@ -370,33 +370,6 @@ def select(self, columns, table_name, condition, distinct=None, order_by=None, \ else: condition_column = '' - and_flag = False - not_flag = False - if condition is not None: - conditions = [] - if 'and' in condition: - and_flag = True - for cond in condition.split(' and '): - if 'not ' in cond: - cond = cond.split('not ')[1] - left, op, right = split_condition(cond) - op = not_op(op) - not_cond = left + op + right - conditions.append(not_cond) - else: - conditions.append(cond) - else: - if 'and' not in condition and 'or' not in condition and 'not' in condition: - not_flag = True - without_non = condition.split('not')[1] - left, op, right = split_condition(without_non) - op = not_op(op) - condition_after_not = left + op + right - - - - - if self.is_locked(table_name): @@ -406,14 +379,7 @@ def select(self, columns, table_name, condition, distinct=None, order_by=None, \ bt = self._load_idx(index_name) table = self.tables[table_name]._select_where_with_btree(columns, bt, condition, distinct, order_by, desc, limit) else: - if not_flag: - table = self.tables[table_name]._select_where(columns, condition_after_not, distinct, order_by, desc, limit) - elif and_flag: - table = self.tables[table_name]._select_where(columns, conditions[0], distinct, order_by, desc, limit) - for cond in conditions[1:]: - table = table._select_where(columns, cond, distinct, order_by, desc, limit) - else: - table = self.tables[table_name]._select_where(columns, condition, distinct, order_by, desc, limit) + table = self.tables[table_name]._select_where(columns, condition, distinct, order_by, desc, limit) # self.unlock_table(table_name) if save_as is not None: diff --git a/miniDB/table.py b/miniDB/table.py index d25bac0b..824b254e 100644 --- a/miniDB/table.py +++ b/miniDB/table.py @@ -244,21 +244,26 @@ def _select_where(self, return_columns, condition=None, distinct=False, order_by # if not, return the rows with values where condition is met for value if condition is not None: list_of_indexes = [] - for cond in condition.split(' or '): - if 'not ' in cond: - cond = cond.split('not ')[1] - column_name, operator, value = self._parse_condition(cond) - column = self.column_by_name(column_name) - operator = not_op(operator) - cond = column_name + operator + str(value) - else: - column_name, operator, value = self._parse_condition(cond) - column = self.column_by_name(column_name) + if ' and ' not in condition: + for cond in condition.split(' or '): + if 'not ' in cond: + cond = cond.split('not ')[1] + column_name, operator, value = self._parse_condition(cond) + column = self.column_by_name(column_name) + operator = not_op(operator) + cond = column_name + operator + str(value) + else: + column_name, operator, value = self._parse_condition(cond) + column = self.column_by_name(column_name) + + rows = [ind for ind, x in enumerate(column) if get_op(operator, x, value)] + for idx in rows: + list_of_indexes.append(idx) + rows = list(set(list_of_indexes)) + else: + print('') + - rows = [ind for ind, x in enumerate(column) if get_op(operator, x, value)] - for idx in rows: - list_of_indexes.append(idx) - rows = list(set(list_of_indexes)) else: rows = [i for i in range(len(self.data))] From a159f3650c99a45b9b5bebdfc6ad46cb96cfe68b Mon Sep 17 00:00:00 2001 From: gk Date: Sat, 18 Feb 2023 23:11:01 +0200 Subject: [PATCH 08/16] AND , OR , NOT implemented in table.py - done --- miniDB/database.py | 8 ++++---- miniDB/misc.py | 1 + miniDB/table.py | 24 +++++++++++++++++++++--- 3 files changed, 26 insertions(+), 7 deletions(-) diff --git a/miniDB/database.py b/miniDB/database.py index 3cab8cce..aa85bf87 100644 --- a/miniDB/database.py +++ b/miniDB/database.py @@ -320,10 +320,10 @@ def delete_from(self, table_name, condition): ''' self.load_database() - conditions = [] - if 'and' in condition: - for cond in condition.split('and'): - conditions.append(cond) + # conditions = [] + # if 'and' in condition: + # for cond in condition.split('and'): + # conditions.append(cond) diff --git a/miniDB/misc.py b/miniDB/misc.py index fe758adb..05618247 100644 --- a/miniDB/misc.py +++ b/miniDB/misc.py @@ -68,3 +68,4 @@ def not_op(op): + diff --git a/miniDB/table.py b/miniDB/table.py index 824b254e..0fbc1494 100644 --- a/miniDB/table.py +++ b/miniDB/table.py @@ -261,9 +261,27 @@ def _select_where(self, return_columns, condition=None, distinct=False, order_by list_of_indexes.append(idx) rows = list(set(list_of_indexes)) else: - print('') - - + # print('') + lists_of_indexes = [] + for cond in condition.split('and'): + if 'not ' in cond: + cond = cond.split('not ')[1] + column_name, operator, value = self._parse_condition(cond) + column = self.column_by_name(column_name) + operator = not_op(operator) + cond = column_name + operator + str(value) + indexes = [] + column_name, operator, value = self._parse_condition(cond) + column = self.column_by_name(column_name) + for index, row_value in enumerate(column): + if get_op(operator, row_value, value): + indexes.append(index) + lists_of_indexes.append(indexes) + + intersection_set = set(lists_of_indexes[0]) + for l in lists_of_indexes[1:]: + intersection_set = intersection_set.intersection(l) + rows = list(intersection_set) else: rows = [i for i in range(len(self.data))] From a680152678f43b249938e8862a75b5d19578175d Mon Sep 17 00:00:00 2001 From: kosbourdos Date: Sat, 18 Feb 2023 23:13:01 +0200 Subject: [PATCH 09/16] AND OR and NOT implemented in delete operation --- miniDB/table.py | 55 +++++++++++++++++++++++++++++++++++-------------- 1 file changed, 40 insertions(+), 15 deletions(-) diff --git a/miniDB/table.py b/miniDB/table.py index 0fbc1494..cf811d47 100644 --- a/miniDB/table.py +++ b/miniDB/table.py @@ -182,23 +182,49 @@ def _delete_where(self, condition): Operatores supported: (<,<=,==,>=,>) ''' - lists_of_indexes = [] - for cond in condition.split('and'): - indexes = [] - column_name, operator, value = self._parse_condition(cond) - column = self.column_by_name(column_name) - for index, row_value in enumerate(column): - if get_op(operator, row_value, value): - indexes.append(index) + if ' and ' in condition: + lists_of_indexes = [] + for cond in condition.split('and'): + if 'not ' in cond: + cond = cond.split('not ')[1] + column_name, operator, value = self._parse_condition(cond) + column = self.column_by_name(column_name) + operator = not_op(operator) + cond = column_name + operator + str(value) + indexes = [] + column_name, operator, value = self._parse_condition(cond) + + column = self.column_by_name(column_name) + for index, row_value in enumerate(column): + if get_op(operator, row_value, value): + indexes.append(index) + + lists_of_indexes.append(indexes) + + intersection_set = set(lists_of_indexes[0]) + for l in lists_of_indexes[1:]: + intersection_set = intersection_set.intersection(l) + + indexes_to_del = list(intersection_set) + else: + list_of_indexes = [] + for cond in condition.split(' or '): + if 'not ' in cond: + cond = cond.split('not ')[1] + column_name, operator, value = self._parse_condition(cond) + column = self.column_by_name(column_name) + operator = not_op(operator) + cond = column_name + operator + str(value) + column_name, operator, value = self._parse_condition(cond) - lists_of_indexes.append(indexes) + column = self.column_by_name(column_name) + for index, row_value in enumerate(column): + if get_op(operator, row_value, value): + list_of_indexes.append(index) - intersection_set = set(lists_of_indexes[0]) - for l in lists_of_indexes[1:]: - intersection_set = intersection_set.intersection(l) + indexes_to_del = list(set(list_of_indexes)) - indexes_to_del = list(intersection_set) # we pop from highest to lowest index in order to avoid removing the wrong item @@ -251,7 +277,6 @@ def _select_where(self, return_columns, condition=None, distinct=False, order_by column_name, operator, value = self._parse_condition(cond) column = self.column_by_name(column_name) operator = not_op(operator) - cond = column_name + operator + str(value) else: column_name, operator, value = self._parse_condition(cond) column = self.column_by_name(column_name) @@ -291,7 +316,7 @@ def _select_where(self, return_columns, condition=None, distinct=False, order_by # we need to set the new column names/types and no of columns, since we might # only return some columns dict['column_names'] = [self.column_names[i] for i in return_cols] - dict['column_types'] = [self.column_types[i] for i in return_cols] + dict['column_types'] = [self.column_types[i] for i in return_cols] s_table = Table(load=dict) From 28724450eed431ac44e2271c8b95ea6fae6bc782 Mon Sep 17 00:00:00 2001 From: gk Date: Sat, 18 Feb 2023 23:41:14 +0200 Subject: [PATCH 10/16] Support for complex where statements in UPDATE TABLE --- miniDB/table.py | 67 +++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 56 insertions(+), 11 deletions(-) diff --git a/miniDB/table.py b/miniDB/table.py index cf811d47..d3cfa794 100644 --- a/miniDB/table.py +++ b/miniDB/table.py @@ -150,22 +150,67 @@ def _update_rows(self, set_value, set_column, condition): Operatores supported: (<,<=,=,>=,>) ''' - # parse the condition - column_name, operator, value = self._parse_condition(condition) + # # parse the condition + # column_name, operator, value = self._parse_condition(condition) - # get the condition and the set column - column = self.column_by_name(column_name) + # # get the condition and the set column + # column = self.column_by_name(column_name) + # set_column_idx = self.column_names.index(set_column) + + # # set_columns_indx = [self.column_names.index(set_column_name) for set_column_name in set_column_names] + + # # for each value in column, if condition, replace it with set_value + # for row_ind, column_value in enumerate(column): + # if get_op(operator, column_value, value): + # self.data[row_ind][set_column_idx] = set_value + + # # self._update() + # # print(f"Updated {len(indexes_to_del)} rows") set_column_idx = self.column_names.index(set_column) + if ' and ' in condition: + lists_of_indexes = [] + for cond in condition.split('and'): + if 'not ' in cond: + cond = cond.split('not ')[1] + column_name, operator, value = self._parse_condition(cond) + column = self.column_by_name(column_name) + operator = not_op(operator) + cond = column_name + operator + str(value) + indexes = [] + column_name, operator, value = self._parse_condition(cond) - # set_columns_indx = [self.column_names.index(set_column_name) for set_column_name in set_column_names] + column = self.column_by_name(column_name) + for index, row_value in enumerate(column): + if get_op(operator, row_value, value): + indexes.append(index) - # for each value in column, if condition, replace it with set_value - for row_ind, column_value in enumerate(column): - if get_op(operator, column_value, value): - self.data[row_ind][set_column_idx] = set_value + lists_of_indexes.append(indexes) - # self._update() - # print(f"Updated {len(indexes_to_del)} rows") + intersection_set = set(lists_of_indexes[0]) + for l in lists_of_indexes[1:]: + intersection_set = intersection_set.intersection(l) + + indexes_to_del = list(intersection_set) + else: + list_of_indexes = [] + for cond in condition.split(' or '): + if 'not ' in cond: + cond = cond.split('not ')[1] + column_name, operator, value = self._parse_condition(cond) + column = self.column_by_name(column_name) + operator = not_op(operator) + cond = column_name + operator + str(value) + column_name, operator, value = self._parse_condition(cond) + + column = self.column_by_name(column_name) + for index, row_value in enumerate(column): + if get_op(operator, row_value, value): + list_of_indexes.append(index) + + indexes_to_del = list(set(list_of_indexes)) + + for row_index in indexes_to_del: + self.data[row_index][set_column_idx] = set_value def _delete_where(self, condition): From 98e675aab99ab269a3a0740737cbfe3985c425b0 Mon Sep 17 00:00:00 2001 From: kosbourdos Date: Sun, 19 Feb 2023 19:57:17 +0200 Subject: [PATCH 11/16] BETWEEN implemented and checked on 'SELECT', 'UPDATE' and 'DELETE' --- miniDB/database.py | 1 - miniDB/table.py | 19 +++++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/miniDB/database.py b/miniDB/database.py index aa85bf87..da3d9b2b 100644 --- a/miniDB/database.py +++ b/miniDB/database.py @@ -359,7 +359,6 @@ def select(self, columns, table_name, condition, distinct=None, order_by=None, \ distinct: boolean. If True, the resulting table will contain only unique rows. ''' - # print(table_name) self.load_database() if isinstance(table_name, Table): return table_name._select_where(columns, condition, distinct, order_by, desc, limit) diff --git a/miniDB/table.py b/miniDB/table.py index d3cfa794..69b654d9 100644 --- a/miniDB/table.py +++ b/miniDB/table.py @@ -166,6 +166,14 @@ def _update_rows(self, set_value, set_column, condition): # # self._update() # # print(f"Updated {len(indexes_to_del)} rows") + + if condition is not None: + if len(condition.split(' ')) == 5 and 'between' in condition and 'and' in condition: + condition = condition.split(' ')[0] + ' >= ' + \ + condition.split(' ')[2] + ' and ' + \ + condition.split(' ')[0] + '<=' + \ + condition.split(' ')[4] + set_column_idx = self.column_names.index(set_column) if ' and ' in condition: lists_of_indexes = [] @@ -227,6 +235,12 @@ def _delete_where(self, condition): Operatores supported: (<,<=,==,>=,>) ''' + if condition is not None: + if len(condition.split(' ')) == 5 and 'between' in condition and 'and' in condition: + condition = condition.split(' ')[0] + ' >= ' + \ + condition.split(' ')[2] + ' and ' + \ + condition.split(' ')[0] + '<=' + \ + condition.split(' ')[4] if ' and ' in condition: lists_of_indexes = [] @@ -314,6 +328,11 @@ def _select_where(self, return_columns, condition=None, distinct=False, order_by # if condition is None, return all rows # if not, return the rows with values where condition is met for value if condition is not None: + if len(condition.split(' ')) == 5 and 'between' in condition and 'and' in condition: + condition = condition.split(' ')[0] + ' >= ' + \ + condition.split(' ')[2] + ' and ' + \ + condition.split(' ')[0] + '<=' + \ + condition.split(' ')[4] list_of_indexes = [] if ' and ' not in condition: for cond in condition.split(' or '): From 98941b4e09cdd4c5cbf28bd67102702c001ef4be Mon Sep 17 00:00:00 2001 From: gk Date: Mon, 20 Feb 2023 18:23:09 +0200 Subject: [PATCH 12/16] Working on Unique columns implementation --- mdb.py | 12 +++++++++ miniDB/database.py | 12 ++++++--- miniDB/table.py | 64 +++++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 80 insertions(+), 8 deletions(-) diff --git a/mdb.py b/mdb.py index ba6b0aa9..d166235d 100644 --- a/mdb.py +++ b/mdb.py @@ -103,6 +103,17 @@ def create_query_plan(query, keywords, action): dic['primary key'] = arglist[arglist.index('primary')-2] else: dic['primary key'] = None + + # parse unique arg + arg_no_unique = args.replace('unique', '')[1:-1] + arglist = [val.strip().split(' ') for val in arg_no_unique.split(',')] + dic['column_names'] = ','.join([val[0] for val in arglist]) + dic['column_types'] = ','.join([val[1] for val in arglist]) + if 'unique' in args: + arglist = args[1:-1].split(' ') + dic['unique'] = arglist[arglist.index('unique')-2] + else: + dic['unique'] = None if action=='import': dic = {'import table' if key=='import' else key: val for key, val in dic.items()} @@ -119,6 +130,7 @@ def create_query_plan(query, keywords, action): else: dic['force'] = False + print("create query plan - dic:",dic) return dic diff --git a/miniDB/database.py b/miniDB/database.py index da3d9b2b..f878358c 100644 --- a/miniDB/database.py +++ b/miniDB/database.py @@ -86,6 +86,7 @@ def load_database(self): continue f = open(path+'/'+file, 'rb') tmp_dict = pickle.load(f) + print("Load - tmp_dict: ",tmp_dic) f.close() name = f'{file.split(".")[0]}' self.tables.update({name: tmp_dict}) @@ -101,7 +102,7 @@ def _update(self): self._update_meta_insert_stack() - def create_table(self, name, column_names, column_types, primary_key=None, load=None): + def create_table(self, name, column_names, column_types, primary_key=None, unique=None, load=None): ''' This method create a new table. This table is saved and can be accessed via db_object.tables['table_name'] or db_object.table_name @@ -113,7 +114,7 @@ def create_table(self, name, column_names, column_types, primary_key=None, load= load: boolean. Defines table object parameters as the name of the table and the column names. ''' # print('here -> ', column_names.split(',')) - self.tables.update({name: Table(name=name, column_names=column_names.split(','), column_types=column_types.split(','), primary_key=primary_key, load=load)}) + self.tables.update({name: Table(name=name, column_names=column_names.split(','), column_types=column_types.split(','), primary_key=primary_key, unique=unique, load=load)}) # self._name = Table(name=name, column_names=column_names, column_types=column_types, load=load) # check that new dynamic var doesnt exist already # self.no_of_tables += 1 @@ -270,7 +271,7 @@ def insert_into(self, table_name, row_str): lock_ownership = self.lock_table(table_name, mode='x') insert_stack = self._get_insert_stack_for_table(table_name) try: - self.tables[table_name]._insert(row, insert_stack) + self.tables[table_name]._insert(self,row, insert_stack) except Exception as e: logging.info(e) logging.info('ABORTED') @@ -373,7 +374,9 @@ def select(self, columns, table_name, condition, distinct=None, order_by=None, \ if self.is_locked(table_name): return - if self._has_index(table_name) and condition_column==self.tables[table_name].column_names[self.tables[table_name].pk_idx]: + if self._has_index(table_name) and (condition_column==self.tables[table_name].column_names[self.tables[table_name].pk_idx] or \ + condition_column in self.unique_column_names): + #condition_column in (self.tables[table_name].column_names[self.tables[table_name].unique] for unique in self.unique_columns)): index_name = self.select('*', 'meta_indexes', f'table_name={table_name}', return_object=True).column_by_name('index_name')[0] bt = self._load_idx(index_name) table = self.tables[table_name]._select_where_with_btree(columns, bt, condition, distinct, order_by, desc, limit) @@ -692,6 +695,7 @@ def _construct_index(self, table_name, index_name): bt = Btree(3) # 3 is arbitrary # for each record in the primary key of the table, insert its value and index to the btree + # loop this process for each pk OR unique column - as user wish for idx, key in enumerate(self.tables[table_name].column_by_name(self.tables[table_name].pk)): if key is None: continue diff --git a/miniDB/table.py b/miniDB/table.py index 69b654d9..9e4b1fe5 100644 --- a/miniDB/table.py +++ b/miniDB/table.py @@ -26,12 +26,16 @@ class Table: - a dictionary that includes the appropriate info (all the attributes in __init__) ''' - def __init__(self, name=None, column_names=None, column_types=None, primary_key=None, load=None): + def __init__(self, name=None, column_names=None, column_types=None, primary_key=None, unique=None, load=None): if load is not None: + print("******Updating table.") # if load is a dict, replace the object dict with it (replaces the object with the specified one) if isinstance(load, dict): + print("__init --> load: ",load) + print("__init --> dict: ",dict) self.__dict__.update(load) + print("__init --> updated __dict__: ",self.__dict__) # self._update() # if load is str, load from a file elif isinstance(load, str): @@ -39,7 +43,7 @@ def __init__(self, name=None, column_names=None, column_types=None, primary_key= # if name, columns_names and column types are not none elif (name is not None) and (column_names is not None) and (column_types is not None): - + print("__init__ --> __dict__ : ",self.__dict__) self._name = name if len(column_names)!=len(column_types): @@ -61,6 +65,40 @@ def __init__(self, name=None, column_names=None, column_types=None, primary_key= self.column_types = [eval(ct) if not isinstance(ct, type) else ct for ct in column_types] self.data = [] # data is a list of lists, a list of rows that is. + print("Column types: ",self.column_types) + print("Column names: ",self.column_names) + print("PK: ",primary_key) + print("Unique: ",unique) + + self.unique_columns = [] ## ---> A list of unique column indexes + self.unique_column_names = [unique] ## ---> A list of unique column names + + if unique is not None: + self.unique_columns.append(column_names.index(unique)) + self.unique_column_names.append(unique) + else: + self.unique_columns = None + self.unique_column_names = None + + # Set unique attributes + # setattr(self,unique_columns,self.unique_columns) + # setattr(self,unique_column_names,self.unique_column_names) + + print("INit table - unique_columns: ",self.unique_columns) + print("Init table - unique names: ",self.unique_column_names) + + # if unique column is set keep its index as an attribute + # for type_idx,col_type in enumerate(self.column_types): + # print("type_idx,col_type:",type_idx,col_type) + # if 'unique' in col_type: + # print("Table Init. Found unique kw") + # self.unique_idx = type_idx + # self.unique_columns.append(type_idx) + # self.unique_column_names.append(self.column_names[type_idx]) + # else: + # # Operate as a flag + # self.unique_idx = None + # if primary key is set, keep its index as an attribute if primary_key is not None: self.pk_idx = self.column_names.index(primary_key) @@ -69,6 +107,7 @@ def __init__(self, name=None, column_names=None, column_types=None, primary_key= self.pk = primary_key # self._update() + # if any of the name, columns_names and column types are none. return an empty table object @@ -80,9 +119,11 @@ def _update(self): ''' Update all the available columns with the appended rows. ''' + print("******Updating table.") self.columns = [[row[i] for row in self.data] for i in range(len(self.column_names))] for ind, col in enumerate(self.column_names): setattr(self, col, self.columns[ind]) + print("Update",self.unique_columns) def _cast_column(self, column_name, cast_type): ''' @@ -130,6 +171,15 @@ def _insert(self, row, insert_stack=[]): elif i==self.pk_idx and row[i] is None: raise ValueError(f'ERROR -> The value of the primary key cannot be None.') + #if value is to be appended to a unique column, check that it doesnt already exist + print("Insert: ",self,row,insert_stack) + print(self.pk) + if self.unique_columns is not None: + if i in self.unique_columns: + for unique in self.unique_columns: + if row[i] in self.column_names[unique]: + raise ValueError(f'## ERROR -> Value {row[i]} already exists in unique column {self.column_names[unique]}.') + # if insert_stack is not empty, append to its last index if insert_stack != []: self.data[insert_stack[-1]] = row @@ -417,8 +467,8 @@ def _select_where_with_btree(self, return_columns, bt, condition, distinct=False column_name, operator, value = self._parse_condition(condition) # if the column in condition is not a primary key, abort the select - if column_name != self.column_names[self.pk_idx]: - print('Column is not PK. Aborting') + if column_name != self.column_names[self.pk_idx] or column_name not in (self.column_names[unique] for unique in self.unique_columns): + print('Column is not PK neither Unique. Aborting') # here we run the same select twice, sequentially and using the btree. # we then check the results match and compare performance (number of operation) @@ -668,6 +718,12 @@ def show(self, no_of_rows=None, is_locked=False): if self.pk_idx is not None: # table has a primary key, add PK next to the appropriate column headers[self.pk_idx] = headers[self.pk_idx]+' #PK#' + + if self.unique_idx is not None: + #table has unique columns + for unique in self.unique_columns: + headers[unique] = headers[unique]+' #Unique#' + # detect the rows that are no tfull of nones (these rows have been deleted) # if we dont skip these rows, the returning table has empty rows at the deleted positions non_none_rows = [row for row in self.data if any(row)] From f24f9f201caca78cf7a966b00f89434fe118343b Mon Sep 17 00:00:00 2001 From: gk Date: Mon, 20 Feb 2023 20:25:03 +0200 Subject: [PATCH 13/16] Unique Implementation --- miniDB/database.py | 4 +++- miniDB/table.py | 48 ++++++++++++---------------------------------- 2 files changed, 15 insertions(+), 37 deletions(-) diff --git a/miniDB/database.py b/miniDB/database.py index f878358c..b02bcde2 100644 --- a/miniDB/database.py +++ b/miniDB/database.py @@ -86,7 +86,7 @@ def load_database(self): continue f = open(path+'/'+file, 'rb') tmp_dict = pickle.load(f) - print("Load - tmp_dict: ",tmp_dic) + # print("Load - tmp_dict: ",tmp_dict) f.close() name = f'{file.split(".")[0]}' self.tables.update({name: tmp_dict}) @@ -111,6 +111,7 @@ def create_table(self, name, column_names, column_types, primary_key=None, uniqu column_names: list. Names of columns. column_types: list. Types of columns. primary_key: string. The primary key (if it exists). + unique: string load: boolean. Defines table object parameters as the name of the table and the column names. ''' # print('here -> ', column_names.split(',')) @@ -119,6 +120,7 @@ def create_table(self, name, column_names, column_types, primary_key=None, uniqu # check that new dynamic var doesnt exist already # self.no_of_tables += 1 self._update() + self.save_database() # (self.tables[name]) if self.verbose: diff --git a/miniDB/table.py b/miniDB/table.py index 9e4b1fe5..1753b761 100644 --- a/miniDB/table.py +++ b/miniDB/table.py @@ -29,21 +29,16 @@ class Table: def __init__(self, name=None, column_names=None, column_types=None, primary_key=None, unique=None, load=None): if load is not None: - print("******Updating table.") # if load is a dict, replace the object dict with it (replaces the object with the specified one) if isinstance(load, dict): - print("__init --> load: ",load) - print("__init --> dict: ",dict) self.__dict__.update(load) - print("__init --> updated __dict__: ",self.__dict__) - # self._update() + self._update() # if load is str, load from a file elif isinstance(load, str): self._load_from_file(load) # if name, columns_names and column types are not none elif (name is not None) and (column_names is not None) and (column_types is not None): - print("__init__ --> __dict__ : ",self.__dict__) self._name = name if len(column_names)!=len(column_types): @@ -70,6 +65,14 @@ def __init__(self, name=None, column_names=None, column_types=None, primary_key= print("PK: ",primary_key) print("Unique: ",unique) + # if primary key is set, keep its index as an attribute + if primary_key is not None: + self.pk_idx = self.column_names.index(primary_key) + else: + self.pk_idx = None + + self.pk = primary_key + # self._update() self.unique_columns = [] ## ---> A list of unique column indexes self.unique_column_names = [unique] ## ---> A list of unique column names @@ -84,30 +87,6 @@ def __init__(self, name=None, column_names=None, column_types=None, primary_key= # setattr(self,unique_columns,self.unique_columns) # setattr(self,unique_column_names,self.unique_column_names) - print("INit table - unique_columns: ",self.unique_columns) - print("Init table - unique names: ",self.unique_column_names) - - # if unique column is set keep its index as an attribute - # for type_idx,col_type in enumerate(self.column_types): - # print("type_idx,col_type:",type_idx,col_type) - # if 'unique' in col_type: - # print("Table Init. Found unique kw") - # self.unique_idx = type_idx - # self.unique_columns.append(type_idx) - # self.unique_column_names.append(self.column_names[type_idx]) - # else: - # # Operate as a flag - # self.unique_idx = None - - # if primary key is set, keep its index as an attribute - if primary_key is not None: - self.pk_idx = self.column_names.index(primary_key) - else: - self.pk_idx = None - - self.pk = primary_key - # self._update() - # if any of the name, columns_names and column types are none. return an empty table object @@ -119,12 +98,10 @@ def _update(self): ''' Update all the available columns with the appended rows. ''' - print("******Updating table.") self.columns = [[row[i] for row in self.data] for i in range(len(self.column_names))] for ind, col in enumerate(self.column_names): setattr(self, col, self.columns[ind]) - print("Update",self.unique_columns) - + def _cast_column(self, column_name, cast_type): ''' Cast all values of a column using a specified type. @@ -172,8 +149,7 @@ def _insert(self, row, insert_stack=[]): raise ValueError(f'ERROR -> The value of the primary key cannot be None.') #if value is to be appended to a unique column, check that it doesnt already exist - print("Insert: ",self,row,insert_stack) - print(self.pk) + if self.unique_columns is not None: if i in self.unique_columns: for unique in self.unique_columns: @@ -719,7 +695,7 @@ def show(self, no_of_rows=None, is_locked=False): # table has a primary key, add PK next to the appropriate column headers[self.pk_idx] = headers[self.pk_idx]+' #PK#' - if self.unique_idx is not None: + if self.unique_columns is not None: #table has unique columns for unique in self.unique_columns: headers[unique] = headers[unique]+' #Unique#' From 9e5b9c69f04e2d123ffa2345352a1ce4962f8184 Mon Sep 17 00:00:00 2001 From: gk Date: Mon, 20 Feb 2023 23:21:36 +0200 Subject: [PATCH 14/16] Added column name in meta_indexes and select condition --- mdb.py | 2 +- miniDB/database.py | 29 ++++++++++++++++++----------- miniDB/table.py | 20 +++++++++++--------- 3 files changed, 30 insertions(+), 21 deletions(-) diff --git a/mdb.py b/mdb.py index d166235d..f783cc34 100644 --- a/mdb.py +++ b/mdb.py @@ -130,7 +130,7 @@ def create_query_plan(query, keywords, action): else: dic['force'] = False - print("create query plan - dic:",dic) + # print("create query plan - dic:",dic) return dic diff --git a/miniDB/database.py b/miniDB/database.py index b02bcde2..af537f52 100644 --- a/miniDB/database.py +++ b/miniDB/database.py @@ -54,7 +54,7 @@ def __init__(self, name, load=True, verbose = True): self.create_table('meta_length', 'table_name,no_of_rows', 'str,int') self.create_table('meta_locks', 'table_name,pid,mode', 'str,int,str') self.create_table('meta_insert_stack', 'table_name,indexes', 'str,list') - self.create_table('meta_indexes', 'table_name,index_name', 'str,str') + self.create_table('meta_indexes', 'table_name,index_name,column_name', 'str,str,str') self.save_database() def save_database(self): @@ -115,7 +115,10 @@ def create_table(self, name, column_names, column_types, primary_key=None, uniqu load: boolean. Defines table object parameters as the name of the table and the column names. ''' # print('here -> ', column_names.split(',')) - self.tables.update({name: Table(name=name, column_names=column_names.split(','), column_types=column_types.split(','), primary_key=primary_key, unique=unique, load=load)}) + + self.tables.update({name: Table(name=name, column_names=column_names.split(','), + column_types=column_types.split(','), primary_key=primary_key, unique=unique, load=load)}) + # self._name = Table(name=name, column_names=column_names, column_types=column_types, load=load) # check that new dynamic var doesnt exist already # self.no_of_tables += 1 @@ -376,9 +379,10 @@ def select(self, columns, table_name, condition, distinct=None, order_by=None, \ if self.is_locked(table_name): return - if self._has_index(table_name) and (condition_column==self.tables[table_name].column_names[self.tables[table_name].pk_idx] or \ - condition_column in self.unique_column_names): - #condition_column in (self.tables[table_name].column_names[self.tables[table_name].unique] for unique in self.unique_columns)): + # if (condition_column==self.tables[table_name].column_names[self.tables[table_name].pk_idx] or \ + # condition_column in self.tables[table_name].unique_column_names): + # #condition_column in (self.tables[table_name].column_names[self.tables[table_name].unique] for unique in self.unique_columns)): + if (condition_column in self.tables['meta_indexes'].column_names): index_name = self.select('*', 'meta_indexes', f'table_name={table_name}', return_object=True).column_by_name('index_name')[0] bt = self._load_idx(index_name) table = self.tables[table_name]._select_where_with_btree(columns, bt, condition, distinct, order_by, desc, limit) @@ -663,7 +667,7 @@ def _update_meta_insert_stack_for_tb(self, table_name, new_stack): # indexes - def create_index(self, index_name, table_name, index_type='btree'): + def create_index(self, index_name, table_name, column_name, index_type='btree'): ''' Creates an index on a specified table with a given name. Important: An index can only be created on a primary key (the user does not specify the column). @@ -673,13 +677,14 @@ def create_index(self, index_name, table_name, index_type='btree'): index_name: string. Name of the created index. ''' if self.tables[table_name].pk_idx is None: # if no primary key, no index - raise Exception('Cannot create index. Table has no primary key.') + if self.tables[table_name].unique_columns is None: + raise Exception('Cannot create index. Table has no primary key.') if index_name not in self.tables['meta_indexes'].column_by_name('index_name'): # currently only btree is supported. This can be changed by adding another if. if index_type=='btree': logging.info('Creating Btree index.') # insert a record with the name of the index and the table on which it's created to the meta_indexes table - self.tables['meta_indexes']._insert([table_name, index_name]) + self.tables['meta_indexes']._insert([table_name, index_name, column_name]) # crate the actual index self._construct_index(table_name, index_name) self.save_database() @@ -700,15 +705,17 @@ def _construct_index(self, table_name, index_name): # loop this process for each pk OR unique column - as user wish for idx, key in enumerate(self.tables[table_name].column_by_name(self.tables[table_name].pk)): if key is None: - continue - bt.insert(key, idx) + for uidx, ukey in enumerate(self.tables[table_name].column_by_name(self.tables[table_name].unique) for unique in self.tables[table_name].unique_columns): + if ukey is None: + continue + bt.insert(key, idx) # save the btree self._save_index(index_name, bt) def _has_index(self, table_name): ''' - Check whether the specified table's primary key column is indexed. + Check whether the specified table is indexed. Args: table_name: string. Table name (must be part of database). diff --git a/miniDB/table.py b/miniDB/table.py index 1753b761..1380b8d5 100644 --- a/miniDB/table.py +++ b/miniDB/table.py @@ -32,7 +32,7 @@ def __init__(self, name=None, column_names=None, column_types=None, primary_key= # if load is a dict, replace the object dict with it (replaces the object with the specified one) if isinstance(load, dict): self.__dict__.update(load) - self._update() + # self._update() # if load is str, load from a file elif isinstance(load, str): self._load_from_file(load) @@ -60,10 +60,10 @@ def __init__(self, name=None, column_names=None, column_types=None, primary_key= self.column_types = [eval(ct) if not isinstance(ct, type) else ct for ct in column_types] self.data = [] # data is a list of lists, a list of rows that is. - print("Column types: ",self.column_types) - print("Column names: ",self.column_names) - print("PK: ",primary_key) - print("Unique: ",unique) + # print("Column types: ",self.column_types) + # print("Column names: ",self.column_names) + # print("PK: ",primary_key) + # print("Unique: ",unique) # if primary key is set, keep its index as an attribute if primary_key is not None: @@ -162,6 +162,7 @@ def _insert(self, row, insert_stack=[]): else: # else append to the end self.data.append(row) # self._update() + def _update_rows(self, set_value, set_column, condition): ''' @@ -411,7 +412,7 @@ def _select_where(self, return_columns, condition=None, distinct=False, order_by s_table = Table(load=dict) s_table.data = list(set(map(lambda x: tuple(x), s_table.data))) if distinct else s_table.data - + if order_by: s_table.order_by(order_by, desc) @@ -442,8 +443,9 @@ def _select_where_with_btree(self, return_columns, bt, condition, distinct=False column_name, operator, value = self._parse_condition(condition) - # if the column in condition is not a primary key, abort the select - if column_name != self.column_names[self.pk_idx] or column_name not in (self.column_names[unique] for unique in self.unique_columns): + # if the column in condition is not a primary key or unique abort the select + if column_name != self.column_names[self.pk_idx] or\ + column_name not in (self.column_names[unique] for unique in self.unique_columns): print('Column is not PK neither Unique. Aborting') # here we run the same select twice, sequentially and using the btree. @@ -698,7 +700,7 @@ def show(self, no_of_rows=None, is_locked=False): if self.unique_columns is not None: #table has unique columns for unique in self.unique_columns: - headers[unique] = headers[unique]+' #Unique#' + headers[unique] = headers[unique]+' #Unq#' # detect the rows that are no tfull of nones (these rows have been deleted) # if we dont skip these rows, the returning table has empty rows at the deleted positions From 2d8f52287cf6b21e331faab54c1a664a5aafbd1f Mon Sep 17 00:00:00 2001 From: gk Date: Mon, 20 Feb 2023 23:40:59 +0200 Subject: [PATCH 15/16] minor changes in create-index --- miniDB/database.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/miniDB/database.py b/miniDB/database.py index af537f52..a2e3850b 100644 --- a/miniDB/database.py +++ b/miniDB/database.py @@ -115,10 +115,8 @@ def create_table(self, name, column_names, column_types, primary_key=None, uniqu load: boolean. Defines table object parameters as the name of the table and the column names. ''' # print('here -> ', column_names.split(',')) - self.tables.update({name: Table(name=name, column_names=column_names.split(','), - column_types=column_types.split(','), primary_key=primary_key, unique=unique, load=load)}) - + column_types=column_types.split(','), primary_key=primary_key, unique=unique, load=load)}) # self._name = Table(name=name, column_names=column_names, column_types=column_types, load=load) # check that new dynamic var doesnt exist already # self.no_of_tables += 1 @@ -382,7 +380,7 @@ def select(self, columns, table_name, condition, distinct=None, order_by=None, \ # if (condition_column==self.tables[table_name].column_names[self.tables[table_name].pk_idx] or \ # condition_column in self.tables[table_name].unique_column_names): # #condition_column in (self.tables[table_name].column_names[self.tables[table_name].unique] for unique in self.unique_columns)): - if (condition_column in self.tables['meta_indexes'].column_names): + if (condition_column in self.tables['meta_indexes'].column_name): index_name = self.select('*', 'meta_indexes', f'table_name={table_name}', return_object=True).column_by_name('index_name')[0] bt = self._load_idx(index_name) table = self.tables[table_name]._select_where_with_btree(columns, bt, condition, distinct, order_by, desc, limit) @@ -677,8 +675,10 @@ def create_index(self, index_name, table_name, column_name, index_type='btree'): index_name: string. Name of the created index. ''' if self.tables[table_name].pk_idx is None: # if no primary key, no index - if self.tables[table_name].unique_columns is None: - raise Exception('Cannot create index. Table has no primary key.') + raise Exception('Cannot create index. Table has no primary key.') + elif self.tables[table_name].unique_columns is None: + raise Exception('Cannot create index. Table has no primary key.') + if index_name not in self.tables['meta_indexes'].column_by_name('index_name'): # currently only btree is supported. This can be changed by adding another if. if index_type=='btree': From f7bf2c04bc3bac5331d5606b4670ad0a2862c07a Mon Sep 17 00:00:00 2001 From: gk Date: Mon, 20 Feb 2023 23:54:22 +0200 Subject: [PATCH 16/16] major debuging - index completed --- miniDB/database.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/miniDB/database.py b/miniDB/database.py index a2e3850b..062d67e5 100644 --- a/miniDB/database.py +++ b/miniDB/database.py @@ -274,7 +274,7 @@ def insert_into(self, table_name, row_str): lock_ownership = self.lock_table(table_name, mode='x') insert_stack = self._get_insert_stack_for_table(table_name) try: - self.tables[table_name]._insert(self,row, insert_stack) + self.tables[table_name]._insert(row, insert_stack) except Exception as e: logging.info(e) logging.info('ABORTED') @@ -674,10 +674,9 @@ def create_index(self, index_name, table_name, column_name, index_type='btree'): table_name: string. Table name (must be part of database). index_name: string. Name of the created index. ''' - if self.tables[table_name].pk_idx is None: # if no primary key, no index - raise Exception('Cannot create index. Table has no primary key.') - elif self.tables[table_name].unique_columns is None: - raise Exception('Cannot create index. Table has no primary key.') + if self.tables[table_name].pk_idx is None and self.tables[table_name].unique_columns is None: # if no primary key, no index + raise Exception('Cannot create index. Table has no primary key nor unique.') + if index_name not in self.tables['meta_indexes'].column_by_name('index_name'): # currently only btree is supported. This can be changed by adding another if.