Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
## [Unreleased]

### Added

- Added category methods to `Result` for querying specific types of sensitive information (e.g., `emails`, `emails?`, `email_mapping`)
- Category methods are automatically generated for all default filter types and custom labels
- Category methods always return empty arrays/hashes when no data of that type is found, ensuring they're safe to call without checking

### Changed

- **BREAKING:** Added strict label validation for custom filters. Labels must now start and end with letters and contain only alphabetic characters and single underscores (no consecutive underscores, digits, or special characters). Previously malformed labels will now raise `Error::MalformedLabel`.
Expand Down
70 changes: 70 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,76 @@ result.safe?
# => false
```

### Category Methods

Query the result for specific types of sensitive information using category methods:

```ruby
result = TopSecret::Text.filter("Ralph can be reached at [email protected] or 555-1234")

# Check if emails were found
result.emails?
# => true

# Get all emails
result.emails
# => ["[email protected]"]

# Get email mapping
result.email_mapping
# => {:EMAIL_1=>"[email protected]"}

# Similarly for other types
result.people? # => true
result.people # => ["Ralph"]
result.person_mapping # => {:PERSON_1=>"Ralph"}

result.phone_numbers? # => true
result.phone_numbers # => ["555-1234"]
result.phone_number_mapping # => {:PHONE_NUMBER_1=>"555-1234"}
```

Available category methods for all default filters:

- `emails`, `emails?`, `email_mapping`
- `credit_cards`, `credit_cards?`, `credit_card_mapping`
- `phone_numbers`, `phone_numbers?`, `phone_number_mapping`
- `ssns`, `ssns?`, `ssn_mapping`
- `people`, `people?`, `person_mapping`
- `locations`, `locations?`, `location_mapping`

These methods are always available and return empty arrays/hashes when no sensitive information of that type is found:

```ruby
result = TopSecret::Text.filter("No sensitive data here")

result.emails? # => false
result.emails # => []
result.email_mapping # => {}
```

When using custom labels, methods are generated based on the label name. Note that default filter methods remain available and can access the same data:

```ruby
result = TopSecret::Text.filter(
"user[at]example.com",
email_filter: TopSecret::Filters::Regex.new(
label: "EMAIL_ADDRESS",
regex: /\w+\[at\]\w+\.\w+/
)
)

# Custom label methods (based on EMAIL_ADDRESS)
result.email_addresses # => ["user[at]example.com"]
result.email_addresses? # => true
result.email_address_mapping # => {:EMAIL_ADDRESS_1=>"user[at]example.com"}

# Default methods still work and return the same data
result.emails # => ["user[at]example.com"]
result.emails? # => true
result.email_mapping # => {:EMAIL_ADDRESS_1=>"user[at]example.com"}
```

### Scanning for Sensitive Information

Use `TopSecret::Text.scan` to detect sensitive information without redacting the text. This is useful when you only need to check if sensitive data exists or get a mapping of what was found:
Expand Down
3 changes: 3 additions & 0 deletions lib/top_secret/constants.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,7 @@ module TopSecret

# @return [Float] The minimum confidence score for NER filtering
MIN_CONFIDENCE_SCORE = 0.5

# @return [String] The delimiter used in label names
LABEL_DELIMITER = "_"
end
175 changes: 175 additions & 0 deletions lib/top_secret/mapping.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,40 @@
# frozen_string_literal: true

require "active_support/core_ext/string/inflections"

module TopSecret
# Provides dynamic category methods for querying sensitive information by type.
#
# This module automatically generates methods for accessing sensitive information
# organized by category (emails, credit cards, people, etc.). Methods are available
# for all default filter types and any custom labels used in the mapping.
#
# @example Querying emails
# result = TopSecret::Text.filter("Contact [email protected]")
# result.emails? # => true
# result.emails # => ["[email protected]"]
# result.email_mapping # => {:EMAIL_1=>"[email protected]"}
#
# @example With no matches
# result = TopSecret::Text.filter("No sensitive data")
# result.emails? # => false
# result.emails # => []
# result.email_mapping # => {}
#
# @example Custom labels
# result = TopSecret::Text.filter(
# "user[at]example.com",
# email_filter: TopSecret::Filters::Regex.new(
# label: "EMAIL_ADDRESS",
# regex: /\w+\[at\]\w+\.\w+/
# )
# )
# result.email_addresses # => ["user[at]example.com"]
# result.email_address_mapping # => {:EMAIL_ADDRESS_1=>"user[at]example.com"}
module Mapping
MAPPING_SUFFIX = "_mapping"
PREDICATE_SUFFIX = "?"

# @return [Boolean] Whether sensitive information was found
def sensitive?
mapping.any?
Expand All @@ -11,5 +44,147 @@ def sensitive?
def safe?
!sensitive?
end

def method_missing(method_name, *args, &block)
if mapping_methods.include? method_name
self.class.define_method(method_name) do
build_mapping_method_from method_name
end

send(method_name)
elsif pluralized_methods.include? method_name
self.class.define_method(method_name) do
build_plural_method_from method_name
end

send(method_name)
elsif predicate_methods.include? method_name
self.class.define_method(method_name) do
build_predicate_method_from method_name
end

send(method_name)
elsif mapping_predicate_methods.include? method_name
self.class.define_method(method_name) do
build_mapping_predicate_method_from method_name
end

send(method_name)
else
super
end
end

def respond_to_missing?(method_name, include_private = false)
mapping_methods.include?(method_name) ||
pluralized_methods.include?(method_name) ||
predicate_methods.include?(method_name) ||
mapping_predicate_methods.include?(method_name) ||
super
end

# Returns all available types for category methods.
#
# Types are derived from both the mapping keys and default filters.
# For example, with mapping `{EMAIL_1: "[email protected]"}`, the type is `:email`.
# Default filter types (credit_card, email, phone_number, ssn, person, location)
# are always available even when not present in the mapping.
#
# @return [Array<Symbol>] List of available types
# @example
# result = TopSecret::Text.filter("[email protected]")
# result.types
# # => [:email, :credit_card, :phone_number, :ssn, :person, :location]
def types
@types ||= all_types.uniq.map(&:to_sym)
end

private

def types_from_mapping
mapping.keys.map do |key|
parts = key.to_s.split(TopSecret::LABEL_DELIMITER).reject(&:empty?)
parts[0...-1].join(TopSecret::LABEL_DELIMITER).downcase
end
end

def types_from_filters
default_filter_objects.map { |filter| filter.label.downcase }
end

def all_types
types_from_mapping + types_from_filters
end

def default_filter_objects
[
TopSecret.credit_card_filter,
TopSecret.email_filter,
TopSecret.phone_number_filter,
TopSecret.ssn_filter,
TopSecret.people_filter,
TopSecret.location_filter
].compact
end

def stringified_types
types.map(&:to_s)
end

def pluralized_methods
@pluralized_methods ||= stringified_types.map(&:pluralize).map(&:to_sym)
end

def predicate_methods
@predicate_methods ||= pluralized_methods.map { :"#{_1}#{PREDICATE_SUFFIX}" }
end

def mapping_predicate_methods
@mapping_predicate_methods ||= mapping_methods.map { :"#{_1}#{PREDICATE_SUFFIX}" }
end

def mapping_methods
@mapping_methods ||= stringified_types.map do |type|
if type.end_with?(MAPPING_SUFFIX)
:"#{type.pluralize}#{MAPPING_SUFFIX}"
else
:"#{type}#{MAPPING_SUFFIX}"
end
end
end

def build_mapping_method_from(method_name)
type_name = method_name.to_s.delete_suffix(MAPPING_SUFFIX)

type_name = type_name.singularize if type_name.pluralize == type_name && type_name.singularize.end_with?(MAPPING_SUFFIX)

type = type_name.upcase

mapping.select { |key, _| key.start_with? type }
end

def build_plural_method_from(method_name)
singular = method_name.to_s.singularize

mapping_method = if singular.end_with?(MAPPING_SUFFIX)
:"#{method_name}#{MAPPING_SUFFIX}"
else
:"#{singular}#{MAPPING_SUFFIX}"
end

send(mapping_method).values
end

def build_predicate_method_from(method_name)
plural_method = method_name.to_s.chomp(PREDICATE_SUFFIX).to_sym

send(plural_method).any?
end

def build_mapping_predicate_method_from(method_name)
mapping_method = method_name.to_s.chomp(PREDICATE_SUFFIX).to_sym

send(mapping_method).any?
end
end
end
4 changes: 2 additions & 2 deletions lib/top_secret/text/global_mapping.rb
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,11 @@ def process_result(result)
# @param individual_key [Symbol] The individual key from a filter result
# @return [Symbol] The global key with consistent numbering
def generate_global_key(individual_key)
label_type = individual_key.to_s.rpartition("_").first
label_type = individual_key.to_s.rpartition(TopSecret::LABEL_DELIMITER).first

label_counters[label_type] ||= 0
label_counters[label_type] += 1
:"#{label_type}_#{label_counters[label_type]}"
:"#{label_type}#{TopSecret::LABEL_DELIMITER}#{label_counters[label_type]}"
end
end
end
Expand Down
77 changes: 77 additions & 0 deletions spec/top_secret/result_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,81 @@
end
end
end

describe "categorization" do
let(:mapping) do
{
EMAIL_1: "[email protected]",
EMAIL_2: "[email protected]",
PERSON_1: "Ralph",
IP_ADDRESS_1: "192.168.1.1",
CREDIT_CARD_NUMBER_1: "4242424242424242",
NETWORK_MAPPING_1: "10.0.1.0/24 -> 192.168.1.0/24"
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I should test something like {WORD}_MAPPING_{WORD}_{N} to ensure that works too.

}
end

it "categorizes by labels" do
expect(subject.emails?).to be true
expect(subject.people?).to be true
expect(subject.credit_card_numbers?).to be true
expect(subject.network_mappings?).to be true

expect(subject.emails).to eq([
"[email protected]",
"[email protected]"
])
expect(subject.people).to eq([
"Ralph"
])
expect(subject.credit_card_numbers).to eq([
"4242424242424242"
])
expect(subject.network_mappings).to eq([
"10.0.1.0/24 -> 192.168.1.0/24"
])

expect(subject.email_mapping).to eq({
EMAIL_1: "[email protected]",
EMAIL_2: "[email protected]"
})
expect(subject.person_mapping).to eq({
PERSON_1: "Ralph"
})
expect(subject.credit_card_number_mapping).to eq({
CREDIT_CARD_NUMBER_1: "4242424242424242"
})
expect(subject.network_mappings_mapping).to eq({
NETWORK_MAPPING_1: "10.0.1.0/24 -> 192.168.1.0/24"
})
end

it "extracts types" do
expect(subject.types).to include(
:email,
:person,
:ip_address,
:credit_card_number,
:network_mapping,
:credit_card,
:phone_number,
:ssn,
:location
)
end

it "responds to dynamic methods" do
expect(subject).to respond_to(:emails)
expect(subject).to respond_to(:emails?)
expect(subject).to respond_to(:email_mapping)
expect(subject).to respond_to(:people)
expect(subject).to respond_to(:people?)
expect(subject).to respond_to(:person_mapping)
expect(subject).to respond_to(:credit_card_numbers)
expect(subject).to respond_to(:credit_card_numbers?)
expect(subject).to respond_to(:credit_card_number_mapping)
expect(subject).to respond_to(:network_mappings)
expect(subject).to respond_to(:network_mappings_mapping?)
expect(subject).to respond_to(:network_mappings_mapping)
end
end
end
Loading
Loading