diff --git a/courseProjectCode/Metrics/comment_density.py b/courseProjectCode/Metrics/comment_density.py new file mode 100644 index 000000000000..ca6a0b5a9ac7 --- /dev/null +++ b/courseProjectCode/Metrics/comment_density.py @@ -0,0 +1,109 @@ + + +import os +import sys + +def comment_density(): + + + # Path to data_structures directory + data_structures_path = os.path.join(os.path.dirname(__file__), '..', '..', 'data_structures') + + if not os.path.exists(data_structures_path): + print(f"Error: data_structures directory not found at {data_structures_path}") + return + + total_lines = 0 + comment_lines = 0 + blank_lines = 0 + files_processed = 0 + + + for root, dirs, files in os.walk(data_structures_path): + for file in files: + if file.endswith('.py'): + file_path = os.path.join(root, file) + + try: + with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: + lines = f.readlines() + + file_total = len(lines) + file_comments = 0 + file_blanks = 0 + + in_multiline_comment = False + multiline_delimiter = None + + for line in lines: + stripped_line = line.strip() + original_line = line + + if not stripped_line: + file_blanks += 1 + continue + + # Check single line comments + if stripped_line.startswith('#'): + file_comments += 1 + continue + + # Check multi-line comments + line_is_comment = False + temp_line = original_line + + while True: + if not in_multiline_comment: + # Look for start of multi-line comment + triple_double_pos = temp_line.find('"""') + triple_single_pos = temp_line.find("'''") + + if triple_double_pos != -1 and (triple_single_pos == -1 or triple_double_pos < triple_single_pos): + in_multiline_comment = True + multiline_delimiter = '"""' + line_is_comment = True + temp_line = temp_line[triple_double_pos + 3:] + elif triple_single_pos != -1: + in_multiline_comment = True + multiline_delimiter = "'''" + line_is_comment = True + temp_line = temp_line[triple_single_pos + 3:] + else: + break + else: + + line_is_comment = True + end_pos = temp_line.find(multiline_delimiter) + if end_pos != -1: + in_multiline_comment = False + multiline_delimiter = None + temp_line = temp_line[end_pos + 3:] + else: + break + + if line_is_comment: + file_comments += 1 + + total_lines += file_total + comment_lines += file_comments + blank_lines += file_blanks + files_processed += 1 + + except Exception as e: + continue + + code_lines = total_lines - blank_lines - comment_lines + + # Calculate comment density + non_blank_lines = total_lines - blank_lines + if non_blank_lines > 0: + comment_density = (comment_lines / non_blank_lines) * 100 + else: + comment_density = 0.0 + + print(f"Code lines: {code_lines}") + print(f"Comment lines: {comment_lines}") + print(f"Comment density: {comment_density:.2f}%") + +if __name__ == "__main__": + comment_density() diff --git a/courseProjectDocs/project-proposal.md b/courseProjectDocs/project-proposal.md new file mode 100644 index 000000000000..8dda681328fb --- /dev/null +++ b/courseProjectDocs/project-proposal.md @@ -0,0 +1,64 @@ +# Project Proposal + + +## Project Overview + +[TheAlgorithms/Python](https://github.com/TheAlgorithms/Python) is an open-source repository for learning, practicing, and understanding algorithms in Python. It offers a curated collection of algorithm implementations that serve as a reference, educational resource, and practical toolkit for both students and developers. It covers a wide range of domains, including blockchain, cryptography, data compression, data structures, linear algebra, and more. + +## Key Quality Metrics + +For the purpose of this assignment, we will be diving into the **[Data Structures](https://github.com/SWEN-777/TheAlgorithms-Python/tree/master/data_structures)** directory to evaluate its key quality metrics. + +### Code Structure + +#### Lines of Code + +Data Structures Directory: + +| Section | Count | +|---------------|-------| +| Arrays | 871 | +| Binary Tree | 4992 | +| Disjoint Set | 129 | +| Hashing | 881 | +| Heap | 1310 | +| KD Tree | 275 | +| Linked List | 2611 | +| Queues | 1246 | +| Stacks | 1321 | +| Suffix Tree | 165 | +| Trie | 289 | +| **Total** | **14090** | + +#### Comment Density + +Comment lines: 7160 + +Comment density: 50.82% + +> Note: Refer to comment density code [here](https://github.com/SWEN-777/TheAlgorithms-Python/blob/master/courseProjectCode/Metrics/comment_density.py) + +### Testability + +#### Number of Unit Test Cases + +While many examples are provided for the data structure algorithms, the following reflects the number of proper unit tests available in each section: + +| Section | Unit Tests | +|---------------|------------| +| Arrays | 1 | +| Binary Tree | 21 | +| Disjoint Set | 1 | +| Hashing | 2 | +| Heap | 0 | +| KD Tree | 3 | +| Linked List | 13 | +| Queues | 0 | +| Stacks | 1 | +| Suffix Tree | 5 | +| Trie | 2 | +| **Total** | **49** | + +#### Test Coverage + +The repository does not include dedicated test coverage scripts, so an accurate coverage percentage could not be determined. diff --git a/courseProjectDocs/requirements-and-oracles.md b/courseProjectDocs/requirements-and-oracles.md new file mode 100644 index 000000000000..f0dd39215b59 --- /dev/null +++ b/courseProjectDocs/requirements-and-oracles.md @@ -0,0 +1,38 @@ +# Requirements and Test Oracles + +## Functional Requirements + +The described data structures should be able to: + +1. **Traversal:** Enable traversal methods (e.g., in-order, pre-order, post-order for trees; forward/backward for linked lists). +2. **Sorting:** Provide built-in or integrable sorting mechanisms where applicable. +3. **Search & Access:** Support efficient search and retrieval of elements. +4. **Error Handling:** Gracefully handle invalid operations (e.g., removing from an empty queue). +5. **Insertion & Deletion:** Allow insertion and deletion operations at appropriate positions. +6. **Integration with Algorithms:** Data structures shall support direct usage with sorting, searching, or graph algorithms. + +## Non-Functional Requirements + +The data structure should ensure: + +1. **Testability:** Design should allow easy integration with unit testing frameworks. +2. **Performance / Efficiency:** Operations should be optimized for time and space complexity (e.g., O(1) for stack push/pop). +3. **Scalability:** Data structures should handle large datasets without significant performance degradation. +4. **Reliability:** Functions should consistently return correct results under normal usage. +5. **Maintainability:** Code should be modular, well-documented, and easy to update or extend. + +## Test Oracles + +| Requirement ID | Requirement Description | Test Oracle (Expected Behavior) | +| -------------- | ------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------- | +| FR-1 | Enable traversal methods (e.g., in-order, pre-order, post-order for trees; forward/backward for linked lists) | Traversal methods should return elements in the correct sequence based on the traversal type. | +| FR-2 | Provide built-in or integrable sorting mechanisms where applicable | Sorting operations should generate a correctly ordered sequence of elements. | +| FR-3 | Support efficient search and retrieval of elements | For a known dataset, search operations should return the correct element or index in expected time (e.g., O(log n)). | +| FR-4 | Gracefully handle invalid operations (e.g., removing from an empty queue) | Invalid operations should raise appropriate exceptions or return error codes without crashing. | +| FR-5 | Allow insertion and deletion operations at appropriate positions | After insertion or deletion, the data structure should reflect the updated state accurately. | +| FR-6 | Data structures shall support direct usage with sorting, searching, or graph algorithms | The data structure should be able to produce correct results when used with algorithms like sorting, searching, etc. | +| NFR-1 | Design should allow easy integration with unit testing frameworks | All public methods should be testable via standard unit testing frameworks. | +| NFR-2 | Data structures should handle large datasets without significant performance degradation | Usage with large datasets should show stable performance metrics and no memory overflows or timeouts. | +| NFR-3 | Operations should be optimized for time and space complexity (e.g., O(1) for stack push/pop) | Operations should meet the expected time/space complexity bounds under typical scenarios. | +| NFR-4 | Functions should consistently return correct results under normal usage | Repeated calls with valid inputs should yield consistent and correct outputs. | +| NFR-5 | Code should be modular, well-documented, and easy to update or extend | Code analysis and reviews should confirm modularity, ease to update, and presence of meaningful documentation. |