diff --git a/mdb.py b/mdb.py index a981e5be..f783cc34 100644 --- a/mdb.py +++ b/mdb.py @@ -44,9 +44,7 @@ def create_query_plan(query, keywords, action): This can and will be used recursively ''' - dic = {val: None for val in keywords if val!=';'} - ql = [val for val in query.split(' ') if val !=''] kw_in_query = [] @@ -105,6 +103,17 @@ def create_query_plan(query, keywords, action): dic['primary key'] = arglist[arglist.index('primary')-2] else: dic['primary key'] = None + + # parse unique arg + arg_no_unique = args.replace('unique', '')[1:-1] + arglist = [val.strip().split(' ') for val in arg_no_unique.split(',')] + dic['column_names'] = ','.join([val[0] for val in arglist]) + dic['column_types'] = ','.join([val[1] for val in arglist]) + if 'unique' in args: + arglist = args[1:-1].split(' ') + dic['unique'] = arglist[arglist.index('unique')-2] + else: + dic['unique'] = None if action=='import': dic = {'import table' if key=='import' else key: val for key, val in dic.items()} @@ -121,6 +130,7 @@ def create_query_plan(query, keywords, action): else: dic['force'] = False + # print("create query plan - dic:",dic) return dic diff --git a/miniDB/database.py b/miniDB/database.py index a3ac6be7..062d67e5 100644 --- a/miniDB/database.py +++ b/miniDB/database.py @@ -13,7 +13,7 @@ from joins import Inlj, Smj from btree import Btree -from misc import split_condition +from misc import split_condition, not_op from table import Table @@ -54,7 +54,7 @@ def __init__(self, name, load=True, verbose = True): self.create_table('meta_length', 'table_name,no_of_rows', 'str,int') self.create_table('meta_locks', 'table_name,pid,mode', 'str,int,str') self.create_table('meta_insert_stack', 'table_name,indexes', 'str,list') - self.create_table('meta_indexes', 'table_name,index_name', 'str,str') + self.create_table('meta_indexes', 'table_name,index_name,column_name', 'str,str,str') self.save_database() def save_database(self): @@ -86,6 +86,7 @@ def load_database(self): continue f = open(path+'/'+file, 'rb') tmp_dict = pickle.load(f) + # print("Load - tmp_dict: ",tmp_dict) f.close() name = f'{file.split(".")[0]}' self.tables.update({name: tmp_dict}) @@ -101,7 +102,7 @@ def _update(self): self._update_meta_insert_stack() - def create_table(self, name, column_names, column_types, primary_key=None, load=None): + def create_table(self, name, column_names, column_types, primary_key=None, unique=None, load=None): ''' This method create a new table. This table is saved and can be accessed via db_object.tables['table_name'] or db_object.table_name @@ -110,14 +111,17 @@ def create_table(self, name, column_names, column_types, primary_key=None, load= column_names: list. Names of columns. column_types: list. Types of columns. primary_key: string. The primary key (if it exists). + unique: string load: boolean. Defines table object parameters as the name of the table and the column names. ''' # print('here -> ', column_names.split(',')) - self.tables.update({name: Table(name=name, column_names=column_names.split(','), column_types=column_types.split(','), primary_key=primary_key, load=load)}) + self.tables.update({name: Table(name=name, column_names=column_names.split(','), + column_types=column_types.split(','), primary_key=primary_key, unique=unique, load=load)}) # self._name = Table(name=name, column_names=column_names, column_types=column_types, load=load) # check that new dynamic var doesnt exist already # self.no_of_tables += 1 self._update() + self.save_database() # (self.tables[name]) if self.verbose: @@ -319,7 +323,14 @@ def delete_from(self, table_name, condition): Operatores supported: (<,<=,==,>=,>) ''' self.load_database() - + + # conditions = [] + # if 'and' in condition: + # for cond in condition.split('and'): + # conditions.append(cond) + + + lock_ownership = self.lock_table(table_name, mode='x') deleted = self.tables[table_name]._delete_where(condition) if lock_ownership: @@ -352,26 +363,30 @@ def select(self, columns, table_name, condition, distinct=None, order_by=None, \ distinct: boolean. If True, the resulting table will contain only unique rows. ''' - # print(table_name) self.load_database() - if isinstance(table_name,Table): + if isinstance(table_name, Table): return table_name._select_where(columns, condition, distinct, order_by, desc, limit) if condition is not None: - condition_column = split_condition(condition)[0] + if 'and' not in condition and 'or' not in condition: + condition_column = split_condition(condition)[0] else: condition_column = '' - - # self.lock_table(table_name, mode='x') + + if self.is_locked(table_name): return - if self._has_index(table_name) and condition_column==self.tables[table_name].column_names[self.tables[table_name].pk_idx]: + # if (condition_column==self.tables[table_name].column_names[self.tables[table_name].pk_idx] or \ + # condition_column in self.tables[table_name].unique_column_names): + # #condition_column in (self.tables[table_name].column_names[self.tables[table_name].unique] for unique in self.unique_columns)): + if (condition_column in self.tables['meta_indexes'].column_name): index_name = self.select('*', 'meta_indexes', f'table_name={table_name}', return_object=True).column_by_name('index_name')[0] bt = self._load_idx(index_name) table = self.tables[table_name]._select_where_with_btree(columns, bt, condition, distinct, order_by, desc, limit) else: table = self.tables[table_name]._select_where(columns, condition, distinct, order_by, desc, limit) + # self.unlock_table(table_name) if save_as is not None: table._name = save_as @@ -650,7 +665,7 @@ def _update_meta_insert_stack_for_tb(self, table_name, new_stack): # indexes - def create_index(self, index_name, table_name, index_type='btree'): + def create_index(self, index_name, table_name, column_name, index_type='btree'): ''' Creates an index on a specified table with a given name. Important: An index can only be created on a primary key (the user does not specify the column). @@ -659,14 +674,16 @@ def create_index(self, index_name, table_name, index_type='btree'): table_name: string. Table name (must be part of database). index_name: string. Name of the created index. ''' - if self.tables[table_name].pk_idx is None: # if no primary key, no index - raise Exception('Cannot create index. Table has no primary key.') + if self.tables[table_name].pk_idx is None and self.tables[table_name].unique_columns is None: # if no primary key, no index + raise Exception('Cannot create index. Table has no primary key nor unique.') + + if index_name not in self.tables['meta_indexes'].column_by_name('index_name'): # currently only btree is supported. This can be changed by adding another if. if index_type=='btree': logging.info('Creating Btree index.') # insert a record with the name of the index and the table on which it's created to the meta_indexes table - self.tables['meta_indexes']._insert([table_name, index_name]) + self.tables['meta_indexes']._insert([table_name, index_name, column_name]) # crate the actual index self._construct_index(table_name, index_name) self.save_database() @@ -684,17 +701,20 @@ def _construct_index(self, table_name, index_name): bt = Btree(3) # 3 is arbitrary # for each record in the primary key of the table, insert its value and index to the btree + # loop this process for each pk OR unique column - as user wish for idx, key in enumerate(self.tables[table_name].column_by_name(self.tables[table_name].pk)): if key is None: - continue - bt.insert(key, idx) + for uidx, ukey in enumerate(self.tables[table_name].column_by_name(self.tables[table_name].unique) for unique in self.tables[table_name].unique_columns): + if ukey is None: + continue + bt.insert(key, idx) # save the btree self._save_index(index_name, bt) def _has_index(self, table_name): ''' - Check whether the specified table's primary key column is indexed. + Check whether the specified table is indexed. Args: table_name: string. Table name (must be part of database). diff --git a/miniDB/misc.py b/miniDB/misc.py index aefada74..05618247 100644 --- a/miniDB/misc.py +++ b/miniDB/misc.py @@ -1,30 +1,34 @@ import operator + def get_op(op, a, b): ''' Get op as a function of a and b by using a symbol ''' ops = {'>': operator.gt, - '<': operator.lt, - '>=': operator.ge, - '<=': operator.le, - '=': operator.eq} + '<': operator.lt, + '>=': operator.ge, + '<=': operator.le, + '=': operator.eq, + '<>': operator.ne} try: - return ops[op](a,b) + return ops[op](a, b) except TypeError: # if a or b is None (deleted record), python3 raises typerror return False + def split_condition(condition): ops = {'>=': operator.ge, '<=': operator.le, + '<>': operator.ne, '=': operator.eq, '>': operator.gt, '<': operator.lt} for op_key in ops.keys(): - splt=condition.split(op_key) - if len(splt)>1: + splt = condition.split(op_key) + if len(splt) > 1: left, right = splt[0].strip(), splt[1].strip() if right[0] == '"' == right[-1]: # If the value has leading and trailing quotes, remove them. @@ -48,3 +52,20 @@ def reverse_op(op): '<=' : '>=', '=' : '=' }.get(op) + + +def not_op(op): + """ + Reverse operator for 'NOT' + """ + return { + '>': '<', + '<': ' >', + '>=': '<', + '<=': ' >', + '=': '<>', + '<>': '='}.get(op) + + + + diff --git a/miniDB/table.py b/miniDB/table.py index f5c7d937..1380b8d5 100644 --- a/miniDB/table.py +++ b/miniDB/table.py @@ -6,7 +6,7 @@ sys.path.append(f'{os.path.dirname(os.path.dirname(os.path.abspath(__file__)))}/miniDB') -from misc import get_op, split_condition +from misc import get_op, split_condition, not_op class Table: @@ -26,7 +26,7 @@ class Table: - a dictionary that includes the appropriate info (all the attributes in __init__) ''' - def __init__(self, name=None, column_names=None, column_types=None, primary_key=None, load=None): + def __init__(self, name=None, column_names=None, column_types=None, primary_key=None, unique=None, load=None): if load is not None: # if load is a dict, replace the object dict with it (replaces the object with the specified one) @@ -39,7 +39,6 @@ def __init__(self, name=None, column_names=None, column_types=None, primary_key= # if name, columns_names and column types are not none elif (name is not None) and (column_names is not None) and (column_types is not None): - self._name = name if len(column_names)!=len(column_types): @@ -61,6 +60,11 @@ def __init__(self, name=None, column_names=None, column_types=None, primary_key= self.column_types = [eval(ct) if not isinstance(ct, type) else ct for ct in column_types] self.data = [] # data is a list of lists, a list of rows that is. + # print("Column types: ",self.column_types) + # print("Column names: ",self.column_names) + # print("PK: ",primary_key) + # print("Unique: ",unique) + # if primary key is set, keep its index as an attribute if primary_key is not None: self.pk_idx = self.column_names.index(primary_key) @@ -69,6 +73,20 @@ def __init__(self, name=None, column_names=None, column_types=None, primary_key= self.pk = primary_key # self._update() + self.unique_columns = [] ## ---> A list of unique column indexes + self.unique_column_names = [unique] ## ---> A list of unique column names + + if unique is not None: + self.unique_columns.append(column_names.index(unique)) + self.unique_column_names.append(unique) + else: + self.unique_columns = None + self.unique_column_names = None + + # Set unique attributes + # setattr(self,unique_columns,self.unique_columns) + # setattr(self,unique_column_names,self.unique_column_names) + # if any of the name, columns_names and column types are none. return an empty table object @@ -83,7 +101,7 @@ def _update(self): self.columns = [[row[i] for row in self.data] for i in range(len(self.column_names))] for ind, col in enumerate(self.column_names): setattr(self, col, self.columns[ind]) - + def _cast_column(self, column_name, cast_type): ''' Cast all values of a column using a specified type. @@ -130,12 +148,21 @@ def _insert(self, row, insert_stack=[]): elif i==self.pk_idx and row[i] is None: raise ValueError(f'ERROR -> The value of the primary key cannot be None.') + #if value is to be appended to a unique column, check that it doesnt already exist + + if self.unique_columns is not None: + if i in self.unique_columns: + for unique in self.unique_columns: + if row[i] in self.column_names[unique]: + raise ValueError(f'## ERROR -> Value {row[i]} already exists in unique column {self.column_names[unique]}.') + # if insert_stack is not empty, append to its last index if insert_stack != []: self.data[insert_stack[-1]] = row else: # else append to the end self.data.append(row) # self._update() + def _update_rows(self, set_value, set_column, condition): ''' @@ -150,22 +177,75 @@ def _update_rows(self, set_value, set_column, condition): Operatores supported: (<,<=,=,>=,>) ''' - # parse the condition - column_name, operator, value = self._parse_condition(condition) + # # parse the condition + # column_name, operator, value = self._parse_condition(condition) - # get the condition and the set column - column = self.column_by_name(column_name) - set_column_idx = self.column_names.index(set_column) + # # get the condition and the set column + # column = self.column_by_name(column_name) + # set_column_idx = self.column_names.index(set_column) - # set_columns_indx = [self.column_names.index(set_column_name) for set_column_name in set_column_names] + # # set_columns_indx = [self.column_names.index(set_column_name) for set_column_name in set_column_names] - # for each value in column, if condition, replace it with set_value - for row_ind, column_value in enumerate(column): - if get_op(operator, column_value, value): - self.data[row_ind][set_column_idx] = set_value + # # for each value in column, if condition, replace it with set_value + # for row_ind, column_value in enumerate(column): + # if get_op(operator, column_value, value): + # self.data[row_ind][set_column_idx] = set_value - # self._update() - # print(f"Updated {len(indexes_to_del)} rows") + # # self._update() + # # print(f"Updated {len(indexes_to_del)} rows") + + if condition is not None: + if len(condition.split(' ')) == 5 and 'between' in condition and 'and' in condition: + condition = condition.split(' ')[0] + ' >= ' + \ + condition.split(' ')[2] + ' and ' + \ + condition.split(' ')[0] + '<=' + \ + condition.split(' ')[4] + + set_column_idx = self.column_names.index(set_column) + if ' and ' in condition: + lists_of_indexes = [] + for cond in condition.split('and'): + if 'not ' in cond: + cond = cond.split('not ')[1] + column_name, operator, value = self._parse_condition(cond) + column = self.column_by_name(column_name) + operator = not_op(operator) + cond = column_name + operator + str(value) + indexes = [] + column_name, operator, value = self._parse_condition(cond) + + column = self.column_by_name(column_name) + for index, row_value in enumerate(column): + if get_op(operator, row_value, value): + indexes.append(index) + + lists_of_indexes.append(indexes) + + intersection_set = set(lists_of_indexes[0]) + for l in lists_of_indexes[1:]: + intersection_set = intersection_set.intersection(l) + + indexes_to_del = list(intersection_set) + else: + list_of_indexes = [] + for cond in condition.split(' or '): + if 'not ' in cond: + cond = cond.split('not ')[1] + column_name, operator, value = self._parse_condition(cond) + column = self.column_by_name(column_name) + operator = not_op(operator) + cond = column_name + operator + str(value) + column_name, operator, value = self._parse_condition(cond) + + column = self.column_by_name(column_name) + for index, row_value in enumerate(column): + if get_op(operator, row_value, value): + list_of_indexes.append(index) + + indexes_to_del = list(set(list_of_indexes)) + + for row_index in indexes_to_del: + self.data[row_index][set_column_idx] = set_value def _delete_where(self, condition): @@ -182,14 +262,56 @@ def _delete_where(self, condition): Operatores supported: (<,<=,==,>=,>) ''' - column_name, operator, value = self._parse_condition(condition) + if condition is not None: + if len(condition.split(' ')) == 5 and 'between' in condition and 'and' in condition: + condition = condition.split(' ')[0] + ' >= ' + \ + condition.split(' ')[2] + ' and ' + \ + condition.split(' ')[0] + '<=' + \ + condition.split(' ')[4] + + if ' and ' in condition: + lists_of_indexes = [] + for cond in condition.split('and'): + if 'not ' in cond: + cond = cond.split('not ')[1] + column_name, operator, value = self._parse_condition(cond) + column = self.column_by_name(column_name) + operator = not_op(operator) + cond = column_name + operator + str(value) + indexes = [] + column_name, operator, value = self._parse_condition(cond) + + column = self.column_by_name(column_name) + for index, row_value in enumerate(column): + if get_op(operator, row_value, value): + indexes.append(index) + + lists_of_indexes.append(indexes) + + intersection_set = set(lists_of_indexes[0]) + for l in lists_of_indexes[1:]: + intersection_set = intersection_set.intersection(l) + + indexes_to_del = list(intersection_set) + else: + list_of_indexes = [] + for cond in condition.split(' or '): + if 'not ' in cond: + cond = cond.split('not ')[1] + column_name, operator, value = self._parse_condition(cond) + column = self.column_by_name(column_name) + operator = not_op(operator) + cond = column_name + operator + str(value) + column_name, operator, value = self._parse_condition(cond) + + column = self.column_by_name(column_name) + for index, row_value in enumerate(column): + if get_op(operator, row_value, value): + list_of_indexes.append(index) + + indexes_to_del = list(set(list_of_indexes)) - indexes_to_del = [] - column = self.column_by_name(column_name) - for index, row_value in enumerate(column): - if get_op(operator, row_value, value): - indexes_to_del.append(index) # we pop from highest to lowest index in order to avoid removing the wrong item # since we dont delete, we dont have to to pop in that order, but since delete is used @@ -233,9 +355,49 @@ def _select_where(self, return_columns, condition=None, distinct=False, order_by # if condition is None, return all rows # if not, return the rows with values where condition is met for value if condition is not None: - column_name, operator, value = self._parse_condition(condition) - column = self.column_by_name(column_name) - rows = [ind for ind, x in enumerate(column) if get_op(operator, x, value)] + if len(condition.split(' ')) == 5 and 'between' in condition and 'and' in condition: + condition = condition.split(' ')[0] + ' >= ' + \ + condition.split(' ')[2] + ' and ' + \ + condition.split(' ')[0] + '<=' + \ + condition.split(' ')[4] + list_of_indexes = [] + if ' and ' not in condition: + for cond in condition.split(' or '): + if 'not ' in cond: + cond = cond.split('not ')[1] + column_name, operator, value = self._parse_condition(cond) + column = self.column_by_name(column_name) + operator = not_op(operator) + else: + column_name, operator, value = self._parse_condition(cond) + column = self.column_by_name(column_name) + + rows = [ind for ind, x in enumerate(column) if get_op(operator, x, value)] + for idx in rows: + list_of_indexes.append(idx) + rows = list(set(list_of_indexes)) + else: + # print('') + lists_of_indexes = [] + for cond in condition.split('and'): + if 'not ' in cond: + cond = cond.split('not ')[1] + column_name, operator, value = self._parse_condition(cond) + column = self.column_by_name(column_name) + operator = not_op(operator) + cond = column_name + operator + str(value) + indexes = [] + column_name, operator, value = self._parse_condition(cond) + column = self.column_by_name(column_name) + for index, row_value in enumerate(column): + if get_op(operator, row_value, value): + indexes.append(index) + lists_of_indexes.append(indexes) + + intersection_set = set(lists_of_indexes[0]) + for l in lists_of_indexes[1:]: + intersection_set = intersection_set.intersection(l) + rows = list(intersection_set) else: rows = [i for i in range(len(self.data))] @@ -245,12 +407,12 @@ def _select_where(self, return_columns, condition=None, distinct=False, order_by # we need to set the new column names/types and no of columns, since we might # only return some columns dict['column_names'] = [self.column_names[i] for i in return_cols] - dict['column_types'] = [self.column_types[i] for i in return_cols] + dict['column_types'] = [self.column_types[i] for i in return_cols] s_table = Table(load=dict) s_table.data = list(set(map(lambda x: tuple(x), s_table.data))) if distinct else s_table.data - + if order_by: s_table.order_by(order_by, desc) @@ -281,9 +443,10 @@ def _select_where_with_btree(self, return_columns, bt, condition, distinct=False column_name, operator, value = self._parse_condition(condition) - # if the column in condition is not a primary key, abort the select - if column_name != self.column_names[self.pk_idx]: - print('Column is not PK. Aborting') + # if the column in condition is not a primary key or unique abort the select + if column_name != self.column_names[self.pk_idx] or\ + column_name not in (self.column_names[unique] for unique in self.unique_columns): + print('Column is not PK neither Unique. Aborting') # here we run the same select twice, sequentially and using the btree. # we then check the results match and compare performance (number of operation) @@ -533,6 +696,12 @@ def show(self, no_of_rows=None, is_locked=False): if self.pk_idx is not None: # table has a primary key, add PK next to the appropriate column headers[self.pk_idx] = headers[self.pk_idx]+' #PK#' + + if self.unique_columns is not None: + #table has unique columns + for unique in self.unique_columns: + headers[unique] = headers[unique]+' #Unq#' + # detect the rows that are no tfull of nones (these rows have been deleted) # if we dont skip these rows, the returning table has empty rows at the deleted positions non_none_rows = [row for row in self.data if any(row)]