diff --git a/mdb.py b/mdb.py index a981e5be..817115d6 100644 --- a/mdb.py +++ b/mdb.py @@ -7,8 +7,8 @@ import shutil sys.path.append('miniDB') -from database import Database -from table import Table +from miniDB.database import Database +from miniDB.table import Table # art font is "big" art = ''' _ _ _____ ____ @@ -93,20 +93,27 @@ def create_query_plan(query, keywords, action): else: dic['desc'] = None - if action=='create table': - args = dic['create table'][dic['create table'].index('('):dic['create table'].index(')')+1] - dic['create table'] = dic['create table'].removesuffix(args).strip() - arg_nopk = args.replace('primary key', '')[1:-1] - arglist = [val.strip().split(' ') for val in arg_nopk.split(',')] - dic['column_names'] = ','.join([val[0] for val in arglist]) - dic['column_types'] = ','.join([val[1] for val in arglist]) - if 'primary key' in args: - arglist = args[1:-1].split(' ') - dic['primary key'] = arglist[arglist.index('primary')-2] - else: - dic['primary key'] = None - - if action=='import': + if action == 'create table': + args = dic['create table'][dic['create table'].index('('):dic['create table'].index(')')+1] + dic['create table'] = dic['create table'].removesuffix(args).strip() + arg_nopk = args.replace('primary key', '')[1:-1] + arglist = [val.strip().split(' ') for val in arg_nopk.split(',')] + dic['column_names'] = ','.join([val[0] for val in arglist]) + dic['column_types'] = ','.join([val[1] for val in arglist]) + if 'primary key' in args: + arglist = args[1:-1].split(' ') + dic['primary key'] = arglist[arglist.index('primary')-2] + else: + dic['primary key'] = None + + if 'unique' in args: + arglist= args[1:-1].split(' ') + dic['unique'] = arglist[arglist.index('unique') - 2] + + else: + dic['column'] = None + + if action == 'import': dic = {'import table' if key=='import' else key: val for key, val in dic.items()} if action=='insert into': @@ -115,12 +122,17 @@ def create_query_plan(query, keywords, action): else: raise ValueError('Your parens are not right m8') - if action=='unlock table': + if action == 'unlock table': if dic['force'] is not None: dic['force'] = True else: dic['force'] = False + if action == 'create index': # created action 'creat index' + dic['column name'] = dic['on'][dic['on'].index('(')+1:-1] + dic['column name'] = dic['column name'].strip() # name of column where index is going to be created + dic['on'] = dic['on'][0:dic['on'].index('(')] # name of table where index is going to be created + return dic @@ -164,6 +176,8 @@ def interpret(query): ''' Interpret the query. ''' + #dictionary + # hi kw_per_action = {'create table': ['create table'], 'drop table': ['drop table'], 'cast': ['cast', 'from', 'to'], @@ -282,7 +296,7 @@ def remove_db(db_name): print('\nbye!') break try: - if line=='exit': + if line == 'exit': break if line.split(' ')[0].removesuffix(';') in ['lsdb', 'lstb', 'cdb', 'rmdb']: interpret_meta(line) diff --git a/miniDB/database.py b/miniDB/database.py index a3ac6be7..b8d67118 100644 --- a/miniDB/database.py +++ b/miniDB/database.py @@ -54,7 +54,7 @@ def __init__(self, name, load=True, verbose = True): self.create_table('meta_length', 'table_name,no_of_rows', 'str,int') self.create_table('meta_locks', 'table_name,pid,mode', 'str,int,str') self.create_table('meta_insert_stack', 'table_name,indexes', 'str,list') - self.create_table('meta_indexes', 'table_name,index_name', 'str,str') + self.create_table('meta_indexes', 'table_name,index_name,column_name', 'str,str,str') # added column_name self.save_database() def save_database(self): @@ -101,7 +101,7 @@ def _update(self): self._update_meta_insert_stack() - def create_table(self, name, column_names, column_types, primary_key=None, load=None): + def create_table(self, name, column_names, column_types, primary_key=None,unique=None, load=None): ''' This method create a new table. This table is saved and can be accessed via db_object.tables['table_name'] or db_object.table_name @@ -110,10 +110,18 @@ def create_table(self, name, column_names, column_types, primary_key=None, load= column_names: list. Names of columns. column_types: list. Types of columns. primary_key: string. The primary key (if it exists). + unique : list. List of unique columns (if any exist). load: boolean. Defines table object parameters as the name of the table and the column names. ''' # print('here -> ', column_names.split(',')) - self.tables.update({name: Table(name=name, column_names=column_names.split(','), column_types=column_types.split(','), primary_key=primary_key, load=load)}) + if unique is not None: + self.tables.update({name: Table(name=name, column_names=column_names.split(','), + column_types=column_types.split(','), primary_key=primary_key, + load=load,unique=unique.split(','))}) + else: + self.tables.update({name: Table(name=name, column_names=column_names.split(','), + column_types=column_types.split(','), primary_key=primary_key, + load=load,unique=None )}) # self._name = Table(name=name, column_names=column_names, column_types=column_types, load=load) # check that new dynamic var doesnt exist already # self.no_of_tables += 1 @@ -331,7 +339,7 @@ def delete_from(self, table_name, condition): self._add_to_insert_stack(table_name, deleted) self.save_database() - def select(self, columns, table_name, condition, distinct=None, order_by=None, \ + def select(self, columns, table_name, condition, distinct=None, order_by=None, limit=True, desc=None, save_as=None, return_object=True): ''' Selects and outputs a table's data where condtion is met. @@ -358,10 +366,17 @@ def select(self, columns, table_name, condition, distinct=None, order_by=None, \ return table_name._select_where(columns, condition, distinct, order_by, desc, limit) if condition is not None: - condition_column = split_condition(condition)[0] + if "not" in condition.split() or "NOT" in condition.split(): + condition_column=condition.split(" ")[0] + elif "and" in condition.split() or "AND" in condition.split(): + condition_column=condition.split(" ")[0] + elif "or" in condition.split() or "OR" in condition.split(): + condition_column=condition.split(" ")[0] + else: + condition_column = split_condition(condition)[0] + else: condition_column = '' - # self.lock_table(table_name, mode='x') if self.is_locked(table_name): @@ -648,9 +663,8 @@ def _update_meta_insert_stack_for_tb(self, table_name, new_stack): ''' self.tables['meta_insert_stack']._update_rows(new_stack, 'indexes', f'table_name={table_name}') - # indexes - def create_index(self, index_name, table_name, index_type='btree'): + def create_index(self, index_name, table_name, index_type = 'btree',column_name = None): ''' Creates an index on a specified table with a given name. Important: An index can only be created on a primary key (the user does not specify the column). @@ -658,22 +672,39 @@ def create_index(self, index_name, table_name, index_type='btree'): Args: table_name: string. Table name (must be part of database). index_name: string. Name of the created index. + column_name: string. Name of column, on which the index is created. ''' - if self.tables[table_name].pk_idx is None: # if no primary key, no index - raise Exception('Cannot create index. Table has no primary key.') + table_name=table_name.strip() + if table_name not in self.tables: # Checking if table exists + raise Exception('Table does not exist') + if self.tables[table_name].unique[0] not in self.tables[table_name].column_names: # checking if column exists + raise Exception('Column does not exist') + if column_name not in self.tables[table_name].unique and column_name!=self.tables[table_name].pk: #checking if column is unique and if column is PK + raise Exception('Column is not unique') + if index_name not in self.tables['meta_indexes'].column_by_name('index_name'): - # currently only btree is supported. This can be changed by adding another if. - if index_type=='btree': - logging.info('Creating Btree index.') - # insert a record with the name of the index and the table on which it's created to the meta_indexes table - self.tables['meta_indexes']._insert([table_name, index_name]) - # crate the actual index - self._construct_index(table_name, index_name) + + if index_type == 'btree': + if self.tables[table_name].pk == column_name or column_name in self.tables[table_name].unique: + logging.info('Creating Btree index.') + # insert a record with the name of the index and the table on which it's created to the meta_indexes table + self.tables['meta_indexes']._insert([table_name, index_name,column_name]) + # crate the actual index + self._construct_index(table_name, index_name,index_type, column_name) + self.save_database() + print('Btree creation done!') + + elif index_type=='hash': + logging.info('Creating Hash index.') + self.tables['meta_indexes']._insert([table_name, index_name, column_name]) + self._construct_index(table_name, index_name, index_type, column_name) self.save_database() + print('Hash creation done!') + else: raise Exception('Cannot create index. Another index with the same name already exists.') - def _construct_index(self, table_name, index_name): + def _construct_index(self, table_name, index_name, index_type,column_name=None): ''' Construct a btree on a table and save. @@ -681,16 +712,24 @@ def _construct_index(self, table_name, index_name): table_name: string. Table name (must be part of database). index_name: string. Name of the created index. ''' - bt = Btree(3) # 3 is arbitrary + if index_type=='btree': + bt = Btree(3) # 3 is arbitrary - # for each record in the primary key of the table, insert its value and index to the btree - for idx, key in enumerate(self.tables[table_name].column_by_name(self.tables[table_name].pk)): + # for each record in the primary key of the table, insert its value and index to the btree + for idx, key in enumerate(self.tables[table_name].column_by_name(column_name)): if key is None: continue bt.insert(key, idx) - # save the btree - self._save_index(index_name, bt) - + # save the btree + self._save_index(index_name, bt) + + if index_type=='hash': # commented out because class Hash() does not exist + # h=Hash() + for idx, key in enumerate(self.tables[table_name].column_by_name(column_name)): + if key is None: + continue + # h.insert(key, idx) + # self._save_index(index_name, h) def _has_index(self, table_name): ''' diff --git a/miniDB/misc.py b/miniDB/misc.py index aefada74..db3fdcb7 100644 --- a/miniDB/misc.py +++ b/miniDB/misc.py @@ -15,6 +15,7 @@ def get_op(op, a, b): except TypeError: # if a or b is None (deleted record), python3 raises typerror return False + def split_condition(condition): ops = {'>=': operator.ge, '<=': operator.le, @@ -22,20 +23,22 @@ def split_condition(condition): '>': operator.gt, '<': operator.lt} + for op_key in ops.keys(): - splt=condition.split(op_key) - if len(splt)>1: - left, right = splt[0].strip(), splt[1].strip() + splt1=condition.split(op_key) + if len(splt1) > 1: + left, right = splt1[0].strip(), splt1[1].strip() + between_exists = False - if right[0] == '"' == right[-1]: # If the value has leading and trailing quotes, remove them. - right = right.strip('"') - elif ' ' in right: # If it has whitespaces but no leading and trailing double quotes, throw. - raise ValueError(f'Invalid condition: {condition}\nValue must be enclosed in double quotation marks to include whitespaces.') + if right[0] == '"' == right[-1]: # If the value has leading and trailing quotes, remove them. + right = right.strip('"') + elif ' ' in right: # If it has whitespaces but no leading and trailing double quotes, throw. + raise ValueError(f'Invalid condition: {condition}\nValue must be enclosed in double quotation marks to include whitespaces.') - if right.find('"') != -1: # If there are any double quotes in the value, throw. (Notice we've already removed the leading and trailing ones) - raise ValueError(f'Invalid condition: {condition}\nDouble quotation marks are not allowed inside values.') + if right.find('"') != -1: # If there are any double quotes in the value, throw. (Notice we've already removed the leading and trailing ones) + raise ValueError(f'Invalid condition: {condition}\nDouble quotation marks are not allowed inside values.') - return left, op_key, right + return left, op_key, right def reverse_op(op): ''' diff --git a/miniDB/table.py b/miniDB/table.py index f5c7d937..b5bfede9 100644 --- a/miniDB/table.py +++ b/miniDB/table.py @@ -6,7 +6,7 @@ sys.path.append(f'{os.path.dirname(os.path.dirname(os.path.abspath(__file__)))}/miniDB') -from misc import get_op, split_condition +from misc import get_op, split_condition, reverse_op class Table: @@ -26,7 +26,7 @@ class Table: - a dictionary that includes the appropriate info (all the attributes in __init__) ''' - def __init__(self, name=None, column_names=None, column_types=None, primary_key=None, load=None): + def __init__(self, name=None, column_names=None, column_types=None, primary_key=None, load=None,unique=None): if load is not None: # if load is a dict, replace the object dict with it (replaces the object with the specified one) @@ -67,6 +67,11 @@ def __init__(self, name=None, column_names=None, column_types=None, primary_key= else: self.pk_idx = None + if unique is not None: + self.unique = unique + else: + self.unique = None + self.pk = primary_key # self._update() @@ -233,13 +238,80 @@ def _select_where(self, return_columns, condition=None, distinct=False, order_by # if condition is None, return all rows # if not, return the rows with values where condition is met for value if condition is not None: - column_name, operator, value = self._parse_condition(condition) - column = self.column_by_name(column_name) - rows = [ind for ind, x in enumerate(column) if get_op(operator, x, value)] + rows = [] + # if condition contains string "between" + if "between" in condition.split() : + splt = condition.split() + column_name = splt[0] # saving column name given by user + min_value = splt[2] # saving number before the word 'and' in the condition + max_value = splt[4] # saving number after the word 'and' in the condition + column = self.column_by_name(column_name) + if min_value.isdigit() and max_value.isdigit(): # checking if given values are numbers + if all([isinstance(x,int) for x in column]): # checking if column contains integers + for i, j in enumerate(column): + if int(min_value) <= int(j) <= int(max_value): # checking if each column value is between minimum and maximum values + rows.append(i) # appending number of row to list rows + + else: + raise Exception("Column does not contain integers") + else: + raise Exception("You must enter integers. For example: between 1 and 10") + + # if condition contains string "not" + elif "not" in condition.split(): + splt = condition.split("not") + cond = splt[1] + column_name, operator, value = self._parse_condition(cond) + column = self.column_by_name(column_name) + reversed_operator = reverse_op(operator) # reversing operator + if reversed_operator == '=': + for i,j in enumerate(column): + if j!=value: # adding a row only if given value is not equal to the value of the row's column + rows.append(i) + + else: + rows = [ind for ind, x in enumerate(column) if get_op(reversed_operator, x, value)] + + # if condition contains string "and" + elif "and" in condition.split(): + splt= condition.split("and") # splitting condition on string "and" + iteration = 0 + for conditions in splt: + column_name, operator, value = self._parse_condition(conditions) + column = self.column_by_name(column_name) + list_of_rows= [ind for ind, x in enumerate(column) if get_op(operator, x, value)] + if iteration == 0: # runs only for first condition given + r = list_of_rows + else: + r = set(r).intersection(list_of_rows) # keeping only the items that exist both in list r and list list_of_rows + iteration += 1 + rows = r + + # if condition contains string "or" + elif "or" in condition.split(): + splt = condition.split("or") # splitting condition on string "and" + list_of_rows=[] + for conditions in splt: + column_name, operator, value = self._parse_condition(conditions) + column = self.column_by_name(column_name) + list_of_rows.append([ind for ind, x in enumerate(column) if get_op(operator, x, value)]) # appending every no. of row to list_of_rows + + for row in list_of_rows: + for r in row: + if r not in rows: # avoiding duplicates + rows.append(r) + + else: + column_name, operator, value = self._parse_condition(condition) + column = self.column_by_name(column_name) + rows = [ind for ind, x in enumerate(column) if get_op(operator, x, value)] + + else: rows = [i for i in range(len(self.data))] - # copy the old dict, but only the rows and columns of data with index in rows/columns (the indexes that we want returned) + # copy the old dict, but only the rows and columns of data with index in rows/columns (the indexes that we + # want returned) dict = {(key):([[self.data[i][j] for j in return_cols] for i in rows] if key=="data" else value) for key,value in self.__dict__.items()} # we need to set the new column names/types and no of columns, since we might @@ -565,6 +637,7 @@ def _parse_condition(self, condition, join=False): return left, op, coltype(right) + def _load_from_file(self, filename): ''' Load table from a pkl file (not used currently). diff --git a/sql_files/smallRelationsInsertFile.sql b/sql_files/smallRelationsInsertFile.sql index d05d81b9..cb582bfd 100644 --- a/sql_files/smallRelationsInsertFile.sql +++ b/sql_files/smallRelationsInsertFile.sql @@ -1,6 +1,6 @@ create table classroom (building str, room_number str, capacity int); create table department (dept_name str primary key, building str, budget int); -create table course (course_id str primary key, title str, dept_name str, credits int); +create table course (course_id str primary key, title str, dept_name str, credits int, unique(title)); create table instructor (ID str primary key, name str, dept_name str, salary int); create table section (course_id str, sec_id str, semester str, year int, building str, room_number str, time_slot_id str); create table teaches (ID str, course_id str, sec_id str, semester str, year int);