Skip to content

Commit b083a0b

Browse files
NSHkrNSHkr
authored andcommitted
l1 complete
1 parent 9114090 commit b083a0b

File tree

5 files changed

+480
-30
lines changed

5 files changed

+480
-30
lines changed

CLAUDE.md

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,15 @@ JsonRemedy - A practical, multi-layered JSON repair library for Elixir that inte
2323
- ✅ Wrapper text extraction (HTML, prose)
2424
- ✅ Encoding normalization
2525

26-
**Status**: **TDD COMPLETE** - All 14 tests passing, code quality checks pass
26+
**Implementation Status**: **TDD COMPLETE**
27+
- ✅ Core functionality implemented (21/21 unit tests passing)
28+
- ✅ LayerBehaviour contract fully implemented
29+
- ✅ All required callbacks: `process/2`, `supports?/1`, `priority/0`, `name/0`, `validate_options/1`
30+
- ✅ Public API functions: `strip_comments/1`, `extract_json_content/1`, `normalize_encoding/1`
31+
- ✅ Context-aware processing that preserves string content
32+
- ✅ Performance tests passing (4/4 tests, all functions under performance thresholds)
33+
- ✅ Code quality checks passing (Credo, mix format)
34+
- ✅ Type specifications and documentation complete
2735

2836
### Phase 3: Layer 2 - Structural Repair 📋 PLANNED
2937
**Goal**: Fix missing/extra delimiters using state machine for context tracking
@@ -135,12 +143,17 @@ Example: `{message: "Don't change: True, None", active: True}`
135143
5. **Edge cases** (50% success rate): severely malformed (graceful failure OK)
136144

137145
## Next Steps
138-
1.**COMPLETED**: Layer 1 Content Cleaning with TDD (14/14 tests passing)
146+
1.**COMPLETED**: Layer 1 Content Cleaning with TDD
147+
- Core functionality (21/21 unit tests passing)
148+
- LayerBehaviour contract implementation
149+
- Public API functions matching contracts
150+
- Performance optimization (4/4 performance tests passing)
151+
- Code quality and documentation
139152
2. 🟡 **NEXT**: Begin Layer 2 Structural Repair with state machine approach
140153
3. Create test fixtures for comprehensive scenarios
141154
4. Build context-aware syntax normalization for Layer 3
142-
5. Add performance benchmarking
143-
6. Create comprehensive integration tests
155+
5. Add integration tests across layers
156+
6. Create comprehensive property-based tests
144157

145158
## Important Reminders
146159
- **Test-first approach**: Write failing tests before implementation

lib/json_remedy/layer1/content_cleaning.ex

Lines changed: 127 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -11,24 +11,14 @@ defmodule JsonRemedy.Layer1.ContentCleaning do
1111
Uses regex-based processing as it's the right tool for these content cleaning tasks.
1212
"""
1313

14-
@type repair_action :: %{
15-
layer: atom(),
16-
action: String.t(),
17-
position: non_neg_integer() | nil,
18-
original: String.t() | nil,
19-
replacement: String.t() | nil
20-
}
21-
22-
@type repair_context :: %{
23-
repairs: [repair_action()],
24-
options: keyword(),
25-
metadata: map()
26-
}
27-
28-
@type layer_result ::
29-
{:ok, String.t(), repair_context()}
30-
| {:continue, String.t(), repair_context()}
31-
| {:error, String.t()}
14+
@behaviour JsonRemedy.LayerBehaviour
15+
16+
alias JsonRemedy.LayerBehaviour
17+
18+
# Import types from LayerBehaviour
19+
@type repair_action :: LayerBehaviour.repair_action()
20+
@type repair_context :: LayerBehaviour.repair_context()
21+
@type layer_result :: LayerBehaviour.layer_result()
3222

3323
@doc """
3424
Process input string and apply Layer 1 content cleaning repairs.
@@ -44,8 +34,8 @@ defmodule JsonRemedy.Layer1.ContentCleaning do
4434
input
4535
|> remove_code_fences()
4636
|> remove_comments()
47-
|> extract_json_content()
48-
|> normalize_encoding()
37+
|> extract_json_content_internal()
38+
|> normalize_encoding_internal()
4939

5040
updated_context = %{
5141
repairs: context.repairs ++ new_repairs,
@@ -118,9 +108,9 @@ defmodule JsonRemedy.Layer1.ContentCleaning do
118108
@doc """
119109
Extract JSON from wrapper text (HTML, prose, etc.).
120110
"""
121-
@spec extract_json_content(input :: {String.t(), [repair_action()]}) ::
111+
@spec extract_json_content_internal(input :: {String.t(), [repair_action()]}) ::
122112
{String.t(), [repair_action()]}
123-
def extract_json_content({input, existing_repairs}) do
113+
def extract_json_content_internal({input, existing_repairs}) do
124114
# Try to extract JSON from HTML tags first
125115
{result, html_repairs} = extract_from_html_tags(input)
126116

@@ -134,9 +124,9 @@ defmodule JsonRemedy.Layer1.ContentCleaning do
134124
@doc """
135125
Normalize text encoding to UTF-8.
136126
"""
137-
@spec normalize_encoding(input :: {String.t(), [repair_action()]}) ::
127+
@spec normalize_encoding_internal(input :: {String.t(), [repair_action()]}) ::
138128
{String.t(), [repair_action()]}
139-
def normalize_encoding({input, existing_repairs}) do
129+
def normalize_encoding_internal({input, existing_repairs}) do
140130
if String.valid?(input) do
141131
{input, existing_repairs}
142132
else
@@ -156,6 +146,113 @@ defmodule JsonRemedy.Layer1.ContentCleaning do
156146
end
157147
end
158148

149+
# LayerBehaviour callback implementations
150+
151+
@doc """
152+
Check if this layer can handle the given input.
153+
Layer 1 can handle any text input that may contain JSON with wrapping content.
154+
"""
155+
@spec supports?(input :: String.t()) :: boolean()
156+
def supports?(input) when is_binary(input) do
157+
# Layer 1 can attempt to process any string input
158+
# It looks for code fences, comments, or wrapper content
159+
# Use fast string pattern matching instead of expensive operations
160+
String.contains?(input, "```") or
161+
String.contains?(input, "//") or
162+
String.contains?(input, "/*") or
163+
String.contains?(input, "<pre>") or
164+
String.contains?(input, "<code>") or
165+
long_text_with_content?(input)
166+
end
167+
168+
def supports?(_), do: false
169+
170+
@doc """
171+
Return the priority order for this layer.
172+
Layer 1 (Content Cleaning) should run first in the pipeline.
173+
"""
174+
@spec priority() :: non_neg_integer()
175+
def priority, do: 1
176+
177+
@doc """
178+
Return a human-readable name for this layer.
179+
"""
180+
@spec name() :: String.t()
181+
def name, do: "Content Cleaning"
182+
183+
@doc """
184+
Validate layer configuration and options.
185+
Layer 1 accepts options for enabling/disabling specific cleaning features.
186+
"""
187+
@spec validate_options(options :: keyword()) :: :ok | {:error, String.t()}
188+
def validate_options(options) when is_list(options) do
189+
valid_keys = [:remove_comments, :remove_code_fences, :extract_from_html, :normalize_encoding]
190+
191+
invalid_keys = Keyword.keys(options) -- valid_keys
192+
193+
if Enum.empty?(invalid_keys) do
194+
# Validate option values
195+
case validate_option_values(options) do
196+
:ok -> :ok
197+
error -> error
198+
end
199+
else
200+
{:error, "Invalid options: #{inspect(invalid_keys)}. Valid options: #{inspect(valid_keys)}"}
201+
end
202+
end
203+
204+
def validate_options(_), do: {:error, "Options must be a keyword list"}
205+
206+
defp validate_option_values(options) do
207+
boolean_options = [
208+
:remove_comments,
209+
:remove_code_fences,
210+
:extract_from_html,
211+
:normalize_encoding
212+
]
213+
214+
Enum.reduce_while(options, :ok, fn {key, value}, _acc ->
215+
if key in boolean_options and not is_boolean(value) do
216+
{:halt, {:error, "Option #{key} must be a boolean, got: #{inspect(value)}"}}
217+
else
218+
{:cont, :ok}
219+
end
220+
end)
221+
end
222+
223+
# Public API functions that match the API contracts
224+
225+
@doc """
226+
Strip comments while preserving comment-like content in strings.
227+
Public API version that takes string input directly.
228+
"""
229+
@spec strip_comments(input :: String.t()) :: {String.t(), [repair_action()]}
230+
def strip_comments(input) when is_binary(input) do
231+
remove_comments({input, []})
232+
end
233+
234+
@doc """
235+
Extract JSON from wrapper text (HTML, prose, etc.).
236+
Public API version that takes string input directly.
237+
"""
238+
@spec extract_json_content(input :: String.t()) :: {String.t(), [repair_action()]}
239+
def extract_json_content(input) when is_binary(input) do
240+
# Need to rename one of these functions to avoid conflicts
241+
# For now, call the internal pipeline function directly
242+
extract_json_content_internal({input, []})
243+
end
244+
245+
@doc """
246+
Normalize text encoding to UTF-8.
247+
Public API version that takes string input directly.
248+
"""
249+
@spec normalize_encoding(input :: String.t()) :: {String.t(), [repair_action()]}
250+
def normalize_encoding(input) when is_binary(input) do
251+
# Need to rename one of these functions to avoid conflicts
252+
# For now, call the internal pipeline function directly
253+
normalize_encoding_internal({input, []})
254+
end
255+
159256
# Private helper functions
160257

161258
defp remove_line_comments(input) do
@@ -312,6 +409,12 @@ defmodule JsonRemedy.Layer1.ContentCleaning do
312409

313410
# Helper functions for string detection
314411

412+
# Fast check for long text that likely contains JSON content
413+
defp long_text_with_content?(input) do
414+
byte_size(input) > 100 and
415+
not (String.starts_with?(input, "{") or String.starts_with?(input, "["))
416+
end
417+
315418
defp inside_string?(input, target) when is_binary(target) do
316419
# Find the position of target in input
317420
case String.split(input, target, parts: 2) do

lib/json_remedy/layer_behaviour.ex

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
defmodule JsonRemedy.LayerBehaviour do
2+
@moduledoc """
3+
Defines the contract that all repair layers must implement.
4+
5+
Each layer is responsible for one specific type of repair concern
6+
and should be composable with other layers in the pipeline.
7+
"""
8+
9+
@type repair_action :: %{
10+
layer: atom(),
11+
action: String.t(),
12+
position: non_neg_integer() | nil,
13+
original: String.t() | nil,
14+
replacement: String.t() | nil
15+
}
16+
17+
@type repair_context :: %{
18+
repairs: [repair_action()],
19+
options: keyword(),
20+
metadata: map()
21+
}
22+
23+
@type layer_result ::
24+
{:ok, String.t(), repair_context()}
25+
| {:continue, String.t(), repair_context()}
26+
| {:error, String.t()}
27+
28+
@type syntax_rule :: %{
29+
name: String.t(),
30+
pattern: Regex.t(),
31+
replacement: String.t(),
32+
condition: (String.t() -> boolean()) | nil
33+
}
34+
35+
@doc """
36+
Process input string and apply layer-specific repairs.
37+
38+
Returns:
39+
- `{:ok, processed_input, updated_context}` - Layer completed successfully
40+
- `{:continue, input, context}` - Layer doesn't apply, pass to next layer
41+
- `{:error, reason}` - Layer failed, stop pipeline
42+
"""
43+
@callback process(input :: String.t(), context :: repair_context()) :: layer_result()
44+
45+
@doc """
46+
Check if this layer can handle the given input.
47+
Used for optimization and layer selection.
48+
"""
49+
@callback supports?(input :: String.t()) :: boolean()
50+
51+
@doc """
52+
Return the priority order for this layer (lower = earlier).
53+
Used to determine layer execution order.
54+
"""
55+
@callback priority() :: non_neg_integer()
56+
57+
@doc """
58+
Return a human-readable name for this layer.
59+
Used in logging and debugging.
60+
"""
61+
@callback name() :: String.t()
62+
63+
@doc """
64+
Validate layer configuration and options.
65+
Called during pipeline setup.
66+
"""
67+
@callback validate_options(options :: keyword()) :: :ok | {:error, String.t()}
68+
69+
@optional_callbacks validate_options: 1
70+
71+
@doc """
72+
Check if a position in the input is inside a string literal.
73+
Used to avoid applying repairs to string content.
74+
"""
75+
@spec inside_string?(input :: String.t(), position :: non_neg_integer()) :: boolean()
76+
def inside_string?(input, position) do
77+
before = String.slice(input, 0, position)
78+
79+
# Count unescaped quotes before this position
80+
quote_count =
81+
before
82+
# Remove escaped quotes
83+
|> String.replace(~r/\\"/, "")
84+
|> String.graphemes()
85+
|> Enum.count(&(&1 == "\""))
86+
87+
# Odd number means we're inside a string
88+
rem(quote_count, 2) != 0
89+
end
90+
91+
@doc """
92+
Apply a single syntax rule with context awareness.
93+
"""
94+
@spec apply_rule(input :: String.t(), rule :: syntax_rule()) ::
95+
{String.t(), [repair_action()]}
96+
def apply_rule(input, rule) do
97+
if rule.condition && !rule.condition.(input) do
98+
{input, []}
99+
else
100+
# Apply the rule
101+
result = Regex.replace(rule.pattern, input, rule.replacement)
102+
103+
if result != input do
104+
repair = %{
105+
layer: :generic,
106+
action: "applied rule: #{rule.name}",
107+
position: nil,
108+
original: input,
109+
replacement: result
110+
}
111+
112+
{result, [repair]}
113+
else
114+
{input, []}
115+
end
116+
end
117+
end
118+
end

0 commit comments

Comments
 (0)