diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 00000000..059a3df2 Binary files /dev/null and b/.DS_Store differ diff --git "a/documentation_\317\20019016_\317\20019110_\317\20019177.docx" "b/documentation_\317\20019016_\317\20019110_\317\20019177.docx" new file mode 100644 index 00000000..4c881854 Binary files /dev/null and "b/documentation_\317\20019016_\317\20019110_\317\20019177.docx" differ diff --git a/mdb.py b/mdb.py index a981e5be..0463649d 100644 --- a/mdb.py +++ b/mdb.py @@ -66,8 +66,6 @@ def create_query_plan(query, keywords, action): ql.pop(i+1) kw_positions.append(i) i+=1 - - for i in range(len(kw_in_query)-1): dic[kw_in_query[i]] = ' '.join(ql[kw_positions[i]+1:kw_positions[i+1]]) @@ -92,11 +90,11 @@ def create_query_plan(query, keywords, action): else: dic['desc'] = None - if action=='create table': args = dic['create table'][dic['create table'].index('('):dic['create table'].index(')')+1] dic['create table'] = dic['create table'].removesuffix(args).strip() - arg_nopk = args.replace('primary key', '')[1:-1] + arg_nopk = args.replace('primary key', '') + arg_nopk = arg_nopk.replace('unique', '')[1:-1] arglist = [val.strip().split(' ') for val in arg_nopk.split(',')] dic['column_names'] = ','.join([val[0] for val in arglist]) dic['column_types'] = ','.join([val[1] for val in arglist]) @@ -105,6 +103,14 @@ def create_query_plan(query, keywords, action): dic['primary key'] = arglist[arglist.index('primary')-2] else: dic['primary key'] = None + + if 'unique' in args: + arglist = args[1:-1].replace(',', '') + arglist = arglist.split(' ') + dic['unique'] = arglist[arglist.index('unique')-2] + else: + dic['unique'] = None + if action=='import': dic = {'import table' if key=='import' else key: val for key, val in dic.items()} @@ -121,6 +127,36 @@ def create_query_plan(query, keywords, action): else: dic['force'] = False + # add action == 'create index' + if action == 'create index': + + dic['on'] = dic[kw_in_query[1]] + if dic['on'] != "": + if '(' in dic[kw_in_query[1]] and ')' in dic[kw_in_query[1]]: + condition_split = dic['on'].split() + condition_split.remove("(") + condition_split.remove(")") + dic['on'] = condition_split[0] + dic['column'] = condition_split[1] + elif '(' in dic[kw_in_query[1]]: + raise ValueError('Must be: create index name_of_index on name_table(name_of_column) using btree)') + elif ')' in dic[kw_in_query[1]]: + raise ValueError('Must be: create index name_of_index on name_table(name_of_column) using btree)') + else: + raise ValueError('There is no table and column for an index to be created') + + # + # if (dic['column'] is not None) and (dic['using'] is not None): + # + # dic['using'] = dic[kw_in_query[2]] + + + + #dic['create index'] = dic[kw_in_query[0]] + + # print('Create index') + + return dic @@ -175,9 +211,9 @@ def interpret(query): 'unlock table': ['unlock table', 'force'], 'delete from': ['delete from', 'where'], 'update table': ['update table', 'set', 'where'], - 'create index': ['create index', 'on', 'using'], + 'create index': ['create index', 'on', 'column', 'using'], #add in query: column 'drop index': ['drop index'], - 'create view' : ['create view', 'as'] + 'create view': ['create view', 'as'] } if query[-1]!=';': diff --git a/miniDB/.DS_Store b/miniDB/.DS_Store new file mode 100644 index 00000000..56410cfa Binary files /dev/null and b/miniDB/.DS_Store differ diff --git a/miniDB/algebra_rules.py b/miniDB/algebra_rules.py new file mode 100644 index 00000000..7d36a467 --- /dev/null +++ b/miniDB/algebra_rules.py @@ -0,0 +1,37 @@ +def second_rule(query): + + ''' + rule 2: + σθ1(σθ2(E)) = σθ2(σθ1(E)) + ''' + + newQuery = query + + if 'select' in query.keys() and isinstance(query['from'],dict) and 'select' in query['from'].keys(): + if not (query['distinct'] == False and query['from']['distinct'] == True and query['where'] != + query['from']['where']): + + if query['limit'] == None and query['from']['limit'] == None: + + newQuery = query + newQuery['where'] = newQuery['from']['where'] + newQuery['from']['where'] = query['where'] + return [newQuery] + + return [] + + +def fifth_rule(query): + ''' + rule 5: + E1 ⊲⊳θ E2 = E2 ⊲⊳θ E1 + ''' + newQuery = query + if 'join' in query.keys(): + if query['join'] == 'right': + newQuery['join'] = 'left' + elif query['join'] == 'left': + newQuery['join'] = 'right' + newQuery['right'] = query['left'] + newQuery['left'] = query['right'] + return newQuery \ No newline at end of file diff --git a/miniDB/database.py b/miniDB/database.py index a3ac6be7..9b5c7c9b 100644 --- a/miniDB/database.py +++ b/miniDB/database.py @@ -7,6 +7,8 @@ import readline from tabulate import tabulate +from miniDB.hash import Bucket + sys.path.append(f'{os.path.dirname(os.path.dirname(os.path.abspath(__file__)))}/miniDB') from miniDB import table sys.modules['table'] = table @@ -54,7 +56,7 @@ def __init__(self, name, load=True, verbose = True): self.create_table('meta_length', 'table_name,no_of_rows', 'str,int') self.create_table('meta_locks', 'table_name,pid,mode', 'str,int,str') self.create_table('meta_insert_stack', 'table_name,indexes', 'str,list') - self.create_table('meta_indexes', 'table_name,index_name', 'str,str') + self.create_table('meta_indexes', 'table_name,index_name,column_name,index_type', 'str,str,str,str') self.save_database() def save_database(self): @@ -100,8 +102,8 @@ def _update(self): self._update_meta_length() self._update_meta_insert_stack() - - def create_table(self, name, column_names, column_types, primary_key=None, load=None): + # Add unique_columns parameter (is None) + def create_table(self, name, column_names, column_types, primary_key=None, unique_columns=None, load=None): ''' This method create a new table. This table is saved and can be accessed via db_object.tables['table_name'] or db_object.table_name @@ -112,10 +114,16 @@ def create_table(self, name, column_names, column_types, primary_key=None, load= primary_key: string. The primary key (if it exists). load: boolean. Defines table object parameters as the name of the table and the column names. ''' - # print('here -> ', column_names.split(',')) - self.tables.update({name: Table(name=name, column_names=column_names.split(','), column_types=column_types.split(','), primary_key=primary_key, load=load)}) + #print('here -> ', column_names.split(',')) + #Check if unique_columns is None or Not + #if unique_columns is None: + # unique_columns_1 = None + #else: + # unique_columns_1 = unique_columns.split(',') + # Add unique_columns + self.tables.update({name: Table(name=name, column_names=column_names.split(','), column_types=column_types.split(','), primary_key=primary_key, unique_columns=unique_columns, load=load)}) # self._name = Table(name=name, column_names=column_names, column_types=column_types, load=load) - # check that new dynamic var doesnt exist already + # check that new dynamic var doesn't exist already # self.no_of_tables += 1 self._update() self.save_database() @@ -123,7 +131,6 @@ def create_table(self, name, column_names, column_types, primary_key=None, load= if self.verbose: print(f'Created table "{name}".') - def drop_table(self, table_name): ''' Drop table from current database. @@ -160,7 +167,7 @@ def drop_table(self, table_name): self.save_database() - def import_table(self, table_name, filename, column_types=None, primary_key=None): + def import_table(self, table_name, filename, column_types=None, primary_key=None, unique_columns=None): ''' Creates table from CSV file. @@ -177,7 +184,7 @@ def import_table(self, table_name, filename, column_types=None, primary_key=None colnames = line.strip('\n') if column_types is None: column_types = ",".join(['str' for _ in colnames.split(',')]) - self.create_table(name=table_name, column_names=colnames, column_types=column_types, primary_key=primary_key) + self.create_table(name=table_name, column_names=colnames, column_types=column_types, primary_key=primary_key, unique_columns=unique_columns) lock_ownership = self.lock_table(table_name, mode='x') first_line = False continue @@ -246,7 +253,7 @@ def cast(self, column_name, table_name, cast_type): cast_type: type. Cast type (do not encapsulate in quotes). ''' self.load_database() - + lock_ownership = self.lock_table(table_name, mode='x') self.tables[table_name]._cast_column(column_name, eval(cast_type)) if lock_ownership: @@ -298,7 +305,7 @@ def update_table(self, table_name, set_args, condition): ''' set_column, set_value = set_args.replace(' ','').split('=') self.load_database() - + lock_ownership = self.lock_table(table_name, mode='x') self.tables[table_name]._update_rows(set_value, set_column, condition) if lock_ownership: @@ -319,7 +326,7 @@ def delete_from(self, table_name, condition): Operatores supported: (<,<=,==,>=,>) ''' self.load_database() - + lock_ownership = self.lock_table(table_name, mode='x') deleted = self.tables[table_name]._delete_where(condition) if lock_ownership: @@ -352,27 +359,51 @@ def select(self, columns, table_name, condition, distinct=None, order_by=None, \ distinct: boolean. If True, the resulting table will contain only unique rows. ''' - # print(table_name) self.load_database() if isinstance(table_name,Table): return table_name._select_where(columns, condition, distinct, order_by, desc, limit) if condition is not None: - condition_column = split_condition(condition)[0] + ''' + if "BETWEEN" in condition.split() or "between" in condition.split(): + condition_column = condition.split(" ")[0] + else: + ''' + if (' and ' in condition or ' AND ' in condition) and (' between ' not in condition or ' BETWEEN ' not in condition): + conditions = tuple(condition.split(' and ')) + elif (' or ' in condition or ' OR ' in condition) and (' between ' not in condition or ' BETWEEN ' not in condition): + conditions = tuple(condition.split(' or ')) + else: + condition_column = split_condition(condition)[0] else: condition_column = '' - + + # self.lock_table(table_name, mode='x') if self.is_locked(table_name): return - if self._has_index(table_name) and condition_column==self.tables[table_name].column_names[self.tables[table_name].pk_idx]: - index_name = self.select('*', 'meta_indexes', f'table_name={table_name}', return_object=True).column_by_name('index_name')[0] - bt = self._load_idx(index_name) - table = self.tables[table_name]._select_where_with_btree(columns, bt, condition, distinct, order_by, desc, limit) + + if self._has_index(table_name) and (condition_column == self.tables[table_name].pk or condition_column == self.tables[table_name].unique): + selected_table = self.select('*', 'meta_indexes', f'table_name={table_name}', return_object=True) + for row, t in enumerate(selected_table.column_by_name('index_type')): + if t == "btree": + print("using btree index") + index_name = selected_table.column_by_name('index_name')[row] + bt = self._load_idx(index_name) + table = self.tables[table_name]._select_where_with_btree(columns, bt, condition, distinct, order_by, desc, limit) + break + elif t == "hash": + print("using hash index") + index_name = selected_table.column_by_name('index_name')[row] + hash = self._load_idx(index_name) + table = self.tables[table_name]._select_where_with_hash(columns, hash, condition, distinct, order_by, desc, limit) + break + else: + raise Exception('This table does not have btree or hash index') else: table = self.tables[table_name]._select_where(columns, condition, distinct, order_by, desc, limit) - # self.unlock_table(table_name) + # self.unlock_table(table_name) if save_as is not None: table._name = save_as self.table_from_object(table) @@ -391,7 +422,7 @@ def show_table(self, table_name, no_of_rows=None): table_name: string. Name of table (must be part of database). ''' self.load_database() - + self.tables[table_name].show(no_of_rows, self.is_locked(table_name)) @@ -406,7 +437,7 @@ def sort(self, table_name, column_name, asc=False): ''' self.load_database() - + lock_ownership = self.lock_table(table_name, mode='x') self.tables[table_name]._sort(column_name, asc=asc) if lock_ownership: @@ -444,19 +475,19 @@ def join(self, mode, left_table, right_table, condition, save_as=None, return_ob if self.is_locked(left_table) or self.is_locked(right_table): return - left_table = left_table if isinstance(left_table, Table) else self.tables[left_table] - right_table = right_table if isinstance(right_table, Table) else self.tables[right_table] + left_table = left_table if isinstance(left_table, Table) else self.tables[left_table] + right_table = right_table if isinstance(right_table, Table) else self.tables[right_table] if mode=='inner': res = left_table._inner_join(right_table, condition) - + elif mode=='left': res = left_table._left_join(right_table, condition) - + elif mode=='right': res = left_table._right_join(right_table, condition) - + elif mode=='full': res = left_table._full_join(right_table, condition) @@ -502,7 +533,7 @@ def lock_table(self, table_name, mode='x'): Args: table_name: string. Table name (must be part of database). ''' - if table_name[:4]=='meta' or table_name not in self.tables.keys() or isinstance(table_name,Table): + if table_name[:4] == 'meta' or table_name not in self.tables.keys() or isinstance(table_name,Table): return with open(f'{self.savedir}/meta_locks.pkl', 'rb') as f: @@ -648,9 +679,9 @@ def _update_meta_insert_stack_for_tb(self, table_name, new_stack): ''' self.tables['meta_insert_stack']._update_rows(new_stack, 'indexes', f'table_name={table_name}') - + # added column_name as argument to recognize the column # indexes - def create_index(self, index_name, table_name, index_type='btree'): + def create_index(self, index_name, table_name, column_name, index_type): ''' Creates an index on a specified table with a given name. Important: An index can only be created on a primary key (the user does not specify the column). @@ -659,21 +690,41 @@ def create_index(self, index_name, table_name, index_type='btree'): table_name: string. Table name (must be part of database). index_name: string. Name of the created index. ''' - if self.tables[table_name].pk_idx is None: # if no primary key, no index - raise Exception('Cannot create index. Table has no primary key.') - if index_name not in self.tables['meta_indexes'].column_by_name('index_name'): - # currently only btree is supported. This can be changed by adding another if. - if index_type=='btree': - logging.info('Creating Btree index.') - # insert a record with the name of the index and the table on which it's created to the meta_indexes table - self.tables['meta_indexes']._insert([table_name, index_name]) - # crate the actual index - self._construct_index(table_name, index_name) - self.save_database() + + # if no primary key and no unique columns no index + if self.tables[table_name].pk_idx is None and self.tables[table_name].unique_idx is None: + raise Exception('Cannot create index. Table has neither primary key nor unique columns.') + + if column_name is None: + if self.tables[table_name].pk_idx is None: # if no primary key, no index + raise Exception('Cannot create index. Table has no primary key. You have to add a unique column to create an index.') + else: + column_name = self.tables[table_name].column_names[0] + + if (column_name == self.tables[table_name].pk or column_name == self.tables[table_name].unique): + if index_name not in self.tables['meta_indexes'].column_by_name('index_name'): + if index_type == 'btree': + logging.info('Creating Btree index.') + print('Creating Btree index.') + # insert a record with the name of the index and the table on which it's created to the meta_indexes table + self.tables['meta_indexes']._insert([table_name, index_name, column_name, "btree"]) + # crate the actual index + self._construct_index(table_name, index_name, column_name, "btree") + self.save_database() + elif index_type == 'hash': + logging.info('Creating Hash index.') + print('Creating Hash index.') + self.tables['meta_indexes']._insert([table_name, index_name, column_name, "hash"]) + # insert a record with the name of the index and the table on which it's created to the meta_indexes table self.tables['meta_indexes']._insert([table_name, index_name,column_name]) + # crate the actual index + self._construct_index(table_name, index_name, column_name, "hash") + self.save_database() + else: + raise Exception('Cannot create index. Another index with the same name already exists.') else: - raise Exception('Cannot create index. Another index with the same name already exists.') + raise Exception('Cannot create index on this column as it is not the primary key nor a unique column.') - def _construct_index(self, table_name, index_name): + def _construct_index(self, table_name, index_name, column_name, index_type): ''' Construct a btree on a table and save. @@ -681,15 +732,40 @@ def _construct_index(self, table_name, index_name): table_name: string. Table name (must be part of database). index_name: string. Name of the created index. ''' - bt = Btree(3) # 3 is arbitrary - - # for each record in the primary key of the table, insert its value and index to the btree - for idx, key in enumerate(self.tables[table_name].column_by_name(self.tables[table_name].pk)): - if key is None: - continue - bt.insert(key, idx) - # save the btree - self._save_index(index_name, bt) + if index_type == "btree": + bt = Btree(3) # 3 is arbitrary + + # for each record in the primary key of the table, insert its value and index to the btree + # print(self.tables[table_name].column_by_name(self.tables[table_name].pk)) + if column_name is None: + for idx, key in enumerate(self.tables[table_name].column_by_name(self.tables[table_name].pk)): + if key is None: + continue + bt.insert(key, idx) + # Create the index on the specified column that has the UNIQUE constraint + # for each record in the specified column of the table, insert its value and index to the btree + else: + for idx, key in enumerate(self.tables[table_name].column_by_name(column_name)): + if key is None: + continue + bt.insert(key, idx) + # save the btree + self._save_index(index_name, bt) + elif index_type == "hash": + bucket_hashing = Bucket(7) + + if column_name is None: + # for each record in the primary key of the table, insert its value and index to the hash + for idx, key in enumerate(self.tables[table_name].column_by_name(self.tables[table_name].pk)): + if key is None: + continue + bucket_hashing.add(key, idx) + else: # Create the index on the specified column that has the UNIQUE constraint + # for each record in the specified column of the table, insert its value and index to the btree + for idx, key in enumerate(self.tables[table_name].column_by_name(column_name)): + bucket_hashing.add(idx, key) + # save the hash + self._save_index(index_name, bucket_hashing) def _has_index(self, table_name): @@ -745,4 +821,5 @@ def drop_index(self, index_name): warnings.warn(f'"{self.savedir}/indexes/meta_{index_name}_index.pkl" not found.') self.save_database() - \ No newline at end of file + + diff --git a/miniDB/hash.py b/miniDB/hash.py new file mode 100644 index 00000000..f3706a5a --- /dev/null +++ b/miniDB/hash.py @@ -0,0 +1,81 @@ +#array_len = 4 +#arxikopoioume to megethos tou array +#values =[None] * array_len +#dimiourgoyme mia lista me ta stoixeia mas + +#def hashing_function(key): +# return hash(key) % len(array_len) + +class Bucket(object): #object + #length = 4 + values = [] + def __init__(self, length): + #arxikopoioume ena keno array + self.values =[" "]*length + + def hash(self, key): + length = len(self.values) + return hash(key) % length + #h sinartisi mas dinei to index gia ena sigkekrimeno str key + + def add(self, key, value): + #tha prosthesoyme sto array mas ena value apo to key tou value + index = self.hash(key) + if self.values[index] is not " ": + for k1 in enumerate(self.values[index]): + if k1[0] == key: + y = list(k1) + y[0] = value + k1 = tuple(y) + break + else: + self.values[index].append([key, value]) + else: + self.values[index] = [] + self.values[index].append([key, value]) + + if self.is_full(): + self.double_bucket() + + def get(self, key): + index = self.hash(key) + + if self.values[index] is " ": + raise KeyError() + + else: + for k1 in enumerate(self.values[index]): + if k1[0] == key: + return k1[1] + + print("This key: " + key + " can not be found") + raise KeyError() + + def is_full(self): + + items = 0 + + for item in self.values: + if item is not None: + items += 1 + return items + 1 == len(self.values) + # boolean : true if items in values are one less than the length of the list + + def double_bucket(self): + length = len(self.values * 2) + h1 = Bucket(length) + for i in range(len(self.values)): + + if self.values[i] is None: + continue + + #h lista mas tora exei to diplasio megethos opote prepei na + # prostethoun ksana oi times + + for k1 in self.values[i]: + h1.add(k1[0], k1[1]) + + self.values = h1.values + + def __getitem__(self, key): + return self.get(key) \ No newline at end of file diff --git a/miniDB/misc.py b/miniDB/misc.py index aefada74..e893a385 100644 --- a/miniDB/misc.py +++ b/miniDB/misc.py @@ -1,50 +1,97 @@ import operator + def get_op(op, a, b): ''' Get op as a function of a and b by using a symbol ''' - ops = {'>': operator.gt, - '<': operator.lt, - '>=': operator.ge, - '<=': operator.le, - '=': operator.eq} + ops = {'>=': operator.ge, + '<=': operator.le, + '!=': operator.ne, + '=': operator.eq, + '>': operator.gt, + '<': operator.lt, + 'between': between, + 'BETWEEN': between + } try: - return ops[op](a,b) - except TypeError: # if a or b is None (deleted record), python3 raises typerror + return ops[op](a, b) + except TypeError: # if a or b is None (deleted record), python3 raises typeError return False def split_condition(condition): ops = {'>=': operator.ge, '<=': operator.le, + '!=': operator.ne, '=': operator.eq, '>': operator.gt, - '<': operator.lt} + '<': operator.lt, + 'between': between, + 'BETWEEN': between + } for op_key in ops.keys(): - splt=condition.split(op_key) - if len(splt)>1: - left, right = splt[0].strip(), splt[1].strip() + if (op_key == '>=' or op_key == '<=' or op_key == '!=' or op_key == '=' or op_key == '>' or op_key == '<') and (op_key in condition): + splt = condition.split(op_key) + if len(splt) > 1: + left, right = splt[0].strip(), splt[1].strip() + if right[0] == '"' == right[-1]: # If the value has leading and trailing quotes, remove them. + right = right.strip('"') + elif ' ' in right: # If it has whitespaces but no leading and trailing double quotes, throw. + raise ValueError( + f'Invalid condition: {condition}\nValue must be enclosed in double quotation marks to include whitespaces.') + if right.find('"') != -1: # If there are any double quotes in the value, throw. (Notice we've already removed the leading and trailing ones) + raise ValueError( + f'Invalid condition: {condition}\nDouble quotation marks are not allowed inside values.') + return left, op_key, right + elif (op_key == 'between' or op_key == 'BETWEEN') and (op_key in condition): + split_cond1 = condition.split('between') + try: + if " and " not in split_cond1[1]: + raise ValueError('The query BETWEEN needs the operator and to separate the arguments. Try again!') + values = tuple((split_cond1[1].strip()).split(' and ')) + column_cond = split_cond1[0].strip() + left = column_cond + right = values + op_key = 'between' + return left, op_key, right + except: + raise ValueError('A where query with the BETWEEN operator has the following format:' + '... where column_name between/BETWEEN value1 and value2. Try again!') - if right[0] == '"' == right[-1]: # If the value has leading and trailing quotes, remove them. - right = right.strip('"') - elif ' ' in right: # If it has whitespaces but no leading and trailing double quotes, throw. - raise ValueError(f'Invalid condition: {condition}\nValue must be enclosed in double quotation marks to include whitespaces.') - if right.find('"') != -1: # If there are any double quotes in the value, throw. (Notice we've already removed the leading and trailing ones) - raise ValueError(f'Invalid condition: {condition}\nDouble quotation marks are not allowed inside values.') +def between(a, b): + a = str(a) + value1, value2 = b + # between statement for strings + if type(value1) == str and type(value2) == str and not value1.isdigit() and not value2.isdigit() and not a.isdigit(): + return a if value1 <= a <= value2 else None + # between statement for numbers + elif value1.isdigit() and value2.isdigit() and a.isdigit(): + value1 = float(value1) + value2 = float(value2) + a = float(a) + return a if value1 <= a <= value2 else None + else: + raise ValueError('You are trying to compare numbers and characters. Try again!') - return left, op_key, right +# updated reverse_op method, to include all operators and their opposites def reverse_op(op): ''' Reverse the operator given ''' return { - '>' : '<', - '>=' : '<=', - '<' : '>', - '<=' : '>=', - '=' : '=' + '>=': '<', + '<=': '>', + '!=': '=', + '=': '!=', + '>': '<=', + '<': '>=' }.get(op) + + + + + diff --git a/miniDB/table.py b/miniDB/table.py index f5c7d937..d2d42306 100644 --- a/miniDB/table.py +++ b/miniDB/table.py @@ -6,7 +6,7 @@ sys.path.append(f'{os.path.dirname(os.path.dirname(os.path.abspath(__file__)))}/miniDB') -from misc import get_op, split_condition +from misc import get_op, split_condition, reverse_op class Table: @@ -26,7 +26,7 @@ class Table: - a dictionary that includes the appropriate info (all the attributes in __init__) ''' - def __init__(self, name=None, column_names=None, column_types=None, primary_key=None, load=None): + def __init__(self, name=None, column_names=None, column_types=None, primary_key=None, unique_columns=None, load=None): if load is not None: # if load is a dict, replace the object dict with it (replaces the object with the specified one) @@ -66,9 +66,13 @@ def __init__(self, name=None, column_names=None, column_types=None, primary_key= self.pk_idx = self.column_names.index(primary_key) else: self.pk_idx = None - self.pk = primary_key - # self._update() + + if unique_columns is not None: + self.unique_idx = self.column_names.index(unique_columns) + else: + self.unique_idx = None + self.unique = unique_columns # if any of the name, columns_names and column types are none. return an empty table object @@ -130,6 +134,10 @@ def _insert(self, row, insert_stack=[]): elif i==self.pk_idx and row[i] is None: raise ValueError(f'ERROR -> The value of the primary key cannot be None.') + # if value is to be appended to a unique column, check that it doesn't alrady exist (no duplicate values in unique columns) + if i==self.unique_idx and row[i] in self.column_by_name(self.unique): + raise ValueError(f'## ERROR -> Value {row[i]} already exists in unique key column.') + # if insert_stack is not empty, append to its last index if insert_stack != []: self.data[insert_stack[-1]] = row @@ -167,7 +175,6 @@ def _update_rows(self, set_value, set_column, condition): # self._update() # print(f"Updated {len(indexes_to_del)} rows") - def _delete_where(self, condition): ''' Deletes rows where condition is met. @@ -182,17 +189,29 @@ def _delete_where(self, condition): Operatores supported: (<,<=,==,>=,>) ''' - column_name, operator, value = self._parse_condition(condition) - indexes_to_del = [] + if condition is not None: + if " and " in condition or " AND " in condition or " or " in condition or " OR " in condition: + if " between " in condition or " BETWEEN " in condition: + has_between = True + if " and " in condition or " AND " in condition: + indexes_to_del = self._and_parse_condition(condition, has_between) + elif " or " in condition or " OR " in condition and " and " not in condition and " AND " not in condition: + indexes_to_del = self._or_parse_condition(condition, has_between) + else: + has_between = False + if " and " in condition or " AND " in condition: + indexes_to_del = self._and_parse_condition(condition, has_between) + elif " or " in condition or " OR " in condition and " and " not in condition and " AND " not in condition: + indexes_to_del = self._or_parse_condition(condition, has_between) + else: + column_name, operator, value = self._parse_condition(condition) + column = self.column_by_name(column_name) + indexes_to_del = [ind for ind, x in enumerate(column) if get_op(operator, x, value)] - column = self.column_by_name(column_name) - for index, row_value in enumerate(column): - if get_op(operator, row_value, value): - indexes_to_del.append(index) # we pop from highest to lowest index in order to avoid removing the wrong item - # since we dont delete, we dont have to to pop in that order, but since delete is used + # since we don't delete, we don't have to pop in that order, but since delete is used # to delete from meta tables too, we still implement it. for index in sorted(indexes_to_del, reverse=True): @@ -206,7 +225,6 @@ def _delete_where(self, condition): # we have to return the deleted indexes, since they will be appended to the insert_stack return indexes_to_del - def _select_where(self, return_columns, condition=None, distinct=False, order_by=None, desc=True, limit=None): ''' Select and return a table containing specified columns and rows where condition is met. @@ -217,7 +235,7 @@ def _select_where(self, return_columns, condition=None, distinct=False, order_by 'column[<,<=,==,>=,>]value' or 'value[<,<=,==,>=,>]column'. - Operatores supported: (<,<=,==,>=,>) + Operators supported: (<,<=,==,>=,>) distinct: boolean. If True, the resulting table will contain only unique rows (False by default). order_by: string. A column name that signals that the resulting table should be ordered based on it (no order if None). desc: boolean. If True, order_by will return results in descending order (False by default). @@ -229,13 +247,28 @@ def _select_where(self, return_columns, condition=None, distinct=False, order_by return_cols = [i for i in range(len(self.column_names))] else: return_cols = [self.column_names.index(col.strip()) for col in return_columns.split(',')] - # if condition is None, return all rows # if not, return the rows with values where condition is met for value if condition is not None: - column_name, operator, value = self._parse_condition(condition) - column = self.column_by_name(column_name) - rows = [ind for ind, x in enumerate(column) if get_op(operator, x, value)] + if " and " in condition or " AND " in condition or " or " in condition or " OR " in condition: + if " between " in condition or " BETWEEN " in condition: + has_between = True + if " and " in condition or " AND " in condition: + rows = self._and_parse_condition(condition, has_between) + elif " or " in condition or " OR " in condition and " and " not in condition and " AND " not in condition: + rows = self._or_parse_condition(condition, has_between) + else: + has_between = False + if " and " in condition or " AND " in condition: + rows = self._and_parse_condition(condition, has_between) + elif " or " in condition or " OR " in condition and " and " not in condition and " AND " not in condition: + rows = self._or_parse_condition(condition, has_between) + else: + column_name, operator, value = self._parse_condition(condition) + column = self.column_by_name(column_name) + rows = [ind for ind, x in enumerate(column) if get_op(operator, x, value)] + + else: rows = [i for i in range(len(self.data))] @@ -245,7 +278,7 @@ def _select_where(self, return_columns, condition=None, distinct=False, order_by # we need to set the new column names/types and no of columns, since we might # only return some columns dict['column_names'] = [self.column_names[i] for i in return_cols] - dict['column_types'] = [self.column_types[i] for i in return_cols] + dict['column_types'] = [self.column_types[i] for i in return_cols] s_table = Table(load=dict) @@ -281,9 +314,10 @@ def _select_where_with_btree(self, return_columns, bt, condition, distinct=False column_name, operator, value = self._parse_condition(condition) + # if the column in condition is not a primary key, abort the select - if column_name != self.column_names[self.pk_idx]: - print('Column is not PK. Aborting') + if column_name != self.column_names[self.pk_idx] or column_name != self.column_names[self.unique_idx]: + print('Column is not PK or unique. Aborting') # here we run the same select twice, sequentially and using the btree. # we then check the results match and compare performance (number of operation) @@ -297,6 +331,9 @@ def _select_where_with_btree(self, return_columns, bt, condition, distinct=False if get_op(operator, x, value): rows1.append(ind) + print("using btree index for select") + + # btree find rows = bt.find(operator, value) @@ -324,6 +361,60 @@ def _select_where_with_btree(self, return_columns, bt, condition, distinct=False return s_table + + def _select_where_with_hash(self, return_columns, eh, condition, distinct=False, order_by=None, desc=True, + limit=None): + # if * return all columns, else find the column indexes for the columns specified + if return_columns == '*': + return_cols = [i for i in range(len(self.column_names))] + else: + return_cols = [self.column_names.index(colname) for colname in return_columns] + + column_name, operator, value = self._parse_condition(condition) + + # if the column in condition is not a primary key, abort the select + if column_name != self.column_names[self.pk_idx] or column_name != self.column_names[self.unique_idx]: + print('Column is not PK or unique. Aborting') + + rows = [] + # Check if it is a range query that is not supported by Hash index + if (operator == '<' or operator == '>' or operator == '<=' or operator == '>='): + # Sequential search + column = self.column_by_name(column_name) + opsseq = 0 + for ind, x in enumerate(column): + opsseq += 1 + if get_op(operator, x, value): + rows.append(ind) + else: + # If the query is point query + idx = eh.get(value) # Find the index of the row of the column that is equal to value + rows.append(idx) + print("using hash index for select") + + try: + k = int(limit) + except TypeError: + k = None + # same as simple select from now on + rows = rows[:k] + dict = {(key): ([[self.data[i][j] for j in return_cols] for i in rows] if key == "data" else value) for + key, value + in self.__dict__.items()} + + dict['column_names'] = [self.column_names[i] for i in return_cols] + dict['column_types'] = [self.column_types[i] for i in return_cols] + + s_table = Table(load=dict) + + s_table.data = list(set(map(lambda x: tuple(x), s_table.data))) if distinct else s_table.data + + if order_by: + s_table.order_by(order_by, desc) + + return s_table + + def order_by(self, column_name, desc=True): ''' Order table based on column. @@ -528,18 +619,20 @@ def show(self, no_of_rows=None, is_locked=False): else: output += f"\n## {self._name} ##\n" - # headers -> "column name (column type)" + #headers -> "column name (column type)" headers = [f'{col} ({tp.__name__})' for col, tp in zip(self.column_names, self.column_types)] if self.pk_idx is not None: # table has a primary key, add PK next to the appropriate column headers[self.pk_idx] = headers[self.pk_idx]+' #PK#' + if self.unique_idx is not None: + # table has a primary key, add PK next to the appropriate column + headers[self.unique_idx] = headers[self.unique_idx]+' #UNIQUE#' # detect the rows that are no tfull of nones (these rows have been deleted) - # if we dont skip these rows, the returning table has empty rows at the deleted positions + # if we don't skip these rows, the returning table has empty rows at the deleted positions non_none_rows = [row for row in self.data if any(row)] # print using tabulate print(tabulate(non_none_rows[:no_of_rows], headers=headers)+'\n') - def _parse_condition(self, condition, join=False): ''' Parse the single string condition and return the value of the column and the operator. @@ -547,23 +640,92 @@ def _parse_condition(self, condition, join=False): Args: condition: string. A condition using the following format: 'column[<,<=,==,>=,>]value' or - 'value[<,<=,==,>=,>]column'. - - Operatores supported: (<,<=,==,>=,>) + 'value[<,<=,==,>=,>]column' or + 'column [between,BETWEEN] value1 and value2. + + Operatores supported: (<,<=,==,>=,>,between) join: boolean. Whether to join or not (False by default). ''' + # if both_columns (used by the join function) return the names of the names of the columns (left first) if join: return split_condition(condition) + if 'not' in condition or 'NOT' in condition: + condition = (condition.split('not '))[1] + left, op, right = split_condition(condition) + operator = reverse_op(op) + coltype = self.column_types[self.column_names.index(left)] + return left, operator, coltype(right) + # cast the value with the specified column's type and return the column name, the operator and the casted value left, op, right = split_condition(condition) + if left not in self.column_names: raise ValueError(f'Condition is not valid (cant find column name)') - coltype = self.column_types[self.column_names.index(left)] + if op == 'between': + return left, op, right + else: + coltype = self.column_types[self.column_names.index(left)] + return left, op, coltype(right) + + + def _or_parse_condition(self, condition, has_between=False): + rows_of_or = [] + or_conditions_list = condition.split(" or ") + for cond_or in or_conditions_list[:]: + if has_between: + ind = or_conditions_list.index(cond_or) + column_name, operator, value = self._parse_condition(cond_or) + column = self.column_by_name(column_name) + rows = [ind_loop for ind_loop, x in enumerate(column) if get_op(operator, x, value)] + rows_of_or.append(rows) + del or_conditions_list[ind] + else: + ind = or_conditions_list.index(cond_or) + column_name, operator, value = self._parse_condition(cond_or) + column = self.column_by_name(column_name) + rows = [ind_loop for ind_loop, x in enumerate(column) if get_op(operator, x, value)] + rows_of_or.append(rows) + del or_conditions_list[ind] + + r = list(set.union(*[set(x) for x in rows_of_or])) + return r + + + def _and_parse_condition(self, condition, has_between=False): + rows_of_and = [] + and_conditions_list = condition.split(" and ") + between_index = [x for x, con in enumerate(and_conditions_list) if ' between ' in con or ' BETWEEN ' in con] + if has_between: + between_index = between_index[0] + between_condition = [' and '.join(and_conditions_list[between_index: (between_index + 2)])] + between_condition = between_condition[0] + del and_conditions_list[between_index] + del and_conditions_list[between_index] + and_conditions_list.insert(between_index, between_condition) + column_name, operator, value = self._parse_condition(between_condition) + column = self.column_by_name(column_name) + rows_of_between = [ind_loop for ind_loop, x in enumerate(column) if get_op(operator, x, value)] + rows_of_and.append(rows_of_between) + del and_conditions_list[between_index] + + for cond in and_conditions_list[:]: + if ' or ' in cond or ' OR ' in cond: + or_rows_returned = self._or_parse_condition(cond) + rows_of_and.append(or_rows_returned) + + else: + ind = and_conditions_list.index(cond) + column_name, operator, value = self._parse_condition(cond) + column = self.column_by_name(column_name) + rows = [ind_loop for ind_loop, x in enumerate(column) if get_op(operator, x, value)] + rows_of_and.append(rows) + del and_conditions_list[ind] - return left, op, coltype(right) + k = list(set.intersection(*[set(x) for x in rows_of_and])) + return k def _load_from_file(self, filename): ''' diff --git a/sql_files/smallRelationsInsertFile.sql b/sql_files/smallRelationsInsertFile.sql index d05d81b9..5322a8f0 100644 --- a/sql_files/smallRelationsInsertFile.sql +++ b/sql_files/smallRelationsInsertFile.sql @@ -9,6 +9,7 @@ create table takes (ID str, course_id str, sec_id str, semester str, year int, g create table advisor (s_ID str primary key, i_ID str); create table time_slot (time_slot_id str, day str, start_hr int, start_min int, end_hr str, end_min str); create table prereq (course_id str, prereq_id str); +create table schedule (day_time str unique, course str); insert into classroom values (Packard,101,500); insert into classroom values (Painter,514,10); insert into classroom values (Taylor,3128,70); @@ -146,4 +147,11 @@ insert into prereq values (CS-190,CS-101); insert into prereq values (CS-315,CS-101); insert into prereq values (CS-319,CS-101); insert into prereq values (CS-347,CS-101); -insert into prereq values (EE-181,PHY-101); \ No newline at end of file +insert into prereq values (EE-181,PHY-101); +insert into schedule values (Monday 8:00 - 9:00,genetics); +insert into schedule values (Monday 9:00 - 10:00,intro_to_biology); +insert into schedule values (Tuesday 10:00 - 11:00,computational biology); +insert into schedule values (Wednesday 11:00 - 12:00,robotics); +insert into schedule values (Thursday 12:00 - 13:00,music video production); +insert into schedule values (Friday 12:00 - 13:00,game design); +insert into schedule values (Friday 14:00 - 16:00,image processing); \ No newline at end of file