Skip to content

Commit 0493013

Browse files
authored
Merge pull request #3 from nshkrdotcom/fix-issue-1-trailing-wrapper-text
Fix issue #1: Remove trailing wrapper text after JSON blocks
2 parents 71acaef + c6827c4 commit 0493013

File tree

4 files changed

+103
-2
lines changed

4 files changed

+103
-2
lines changed

CHANGELOG.md

100644100755
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
## [0.1.3] - 2025-07-05
11+
12+
### Fixed
13+
- Fixed issue where wrapper text following JSON blocks was not recognized (#1)
14+
- Added dedicated `remove_trailing_wrapper_text/1` function in Layer 1
15+
- Now properly removes trailing text after valid JSON structures
16+
- Example: `[{"id": 1}]\n1 Volume(s) created``[{"id": 1}]`
17+
1018
## [0.1.2] - 2025-06-08
1119

1220
### Added

lib/json_remedy/layer1/content_cleaning.ex

100644100755
Lines changed: 64 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,10 @@ defmodule JsonRemedy.Layer1.ContentCleaning do
8787
# Then try to extract from prose/text
8888
{result, prose_repairs} = extract_from_prose(result)
8989

90-
all_repairs = existing_repairs ++ html_repairs ++ prose_repairs
90+
# Finally, remove any trailing wrapper text after JSON
91+
{result, trailing_repairs} = remove_trailing_wrapper_text(result)
92+
93+
all_repairs = existing_repairs ++ html_repairs ++ prose_repairs ++ trailing_repairs
9194
{result, all_repairs}
9295
end
9396

@@ -682,6 +685,66 @@ defmodule JsonRemedy.Layer1.ContentCleaning do
682685
find_balanced_end(rest, open, close, pos + 1, balance, in_string)
683686
end
684687

688+
# Remove trailing wrapper text after JSON
689+
defp remove_trailing_wrapper_text(input) do
690+
trimmed = String.trim(input)
691+
692+
# Check if input starts with JSON structure
693+
cond do
694+
String.starts_with?(trimmed, "{") ->
695+
check_and_remove_trailing_text(input, "{", "}")
696+
697+
String.starts_with?(trimmed, "[") ->
698+
check_and_remove_trailing_text(input, "[", "]")
699+
700+
true ->
701+
{input, []}
702+
end
703+
end
704+
705+
defp check_and_remove_trailing_text(input, open_char, close_char) do
706+
# Find where the JSON structure starts
707+
json_start =
708+
case String.split(input, open_char, parts: 2) do
709+
[prefix, _] -> String.length(prefix)
710+
_ -> 0
711+
end
712+
713+
# Extract from the JSON start to find the balanced end
714+
substring_from_json = String.slice(input, json_start, String.length(input))
715+
716+
case find_balanced_end(substring_from_json, open_char, close_char) do
717+
nil ->
718+
# Could not find balanced end, return as is
719+
{input, []}
720+
721+
end_pos ->
722+
# Calculate the absolute position where JSON ends
723+
json_end = json_start + end_pos + 1
724+
725+
# Check if there's non-whitespace content after JSON ends
726+
after_json = String.slice(input, json_end, String.length(input))
727+
728+
if String.trim(after_json) == "" do
729+
# No significant trailing content
730+
{input, []}
731+
else
732+
# Extract only the JSON portion
733+
json_content = String.slice(input, 0, json_end)
734+
735+
repair = %{
736+
layer: :content_cleaning,
737+
action: "removed trailing wrapper text",
738+
position: json_end,
739+
original: input,
740+
replacement: json_content
741+
}
742+
743+
{json_content, [repair]}
744+
end
745+
end
746+
end
747+
685748
# Helper functions for string detection using direct methods
686749

687750
# Fast check for long text that likely contains JSON content

mix.exs

100644100755
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
defmodule JsonRemedy.MixProject do
22
use Mix.Project
33

4-
@version "0.1.1"
4+
@version "0.1.3"
55
@source_url "https://github.com/nshkrdotcom/json_remedy"
66

77
def project do

test/unit/layer1_content_cleaning_test.exs

100644100755
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,36 @@ defmodule JsonRemedy.Layer1.ContentCleaningTest do
174174
assert length(context.repairs) > 0
175175
end
176176
end
177+
178+
test "extracts json with trailing wrapper text (GitHub issue #1)" do
179+
# This test case reproduces the issue where JSON followed by text is not cleaned
180+
input = """
181+
[
182+
{
183+
"volumeID": "f3a6ffd2-0111-4235-980c-a5ceec215e93",
184+
"name": "km-tst-20",
185+
"cloudID": "75b10103873d4a1ba0d52b43159a2842",
186+
"size": 1,
187+
"storageType": "ssd",
188+
"state": "creating",
189+
"shareable": false,
190+
"bootable": false,
191+
"volumePool": "General-Flash-002"
192+
}
193+
]
194+
1 Volume(s) created
195+
"""
196+
197+
{:ok, result, context} = ContentCleaning.process(input, %{repairs: [], options: []})
198+
199+
# Should extract only the JSON array, removing the trailing text
200+
trimmed_result = String.trim(result)
201+
assert String.starts_with?(trimmed_result, "[")
202+
assert String.ends_with?(trimmed_result, "]")
203+
assert not String.contains?(result, "1 Volume(s) created")
204+
assert length(context.repairs) > 0
205+
assert hd(context.repairs).action =~ "removed trailing wrapper text"
206+
end
177207
end
178208

179209
describe "encoding normalization" do

0 commit comments

Comments
 (0)