Skip to content

Commit d4d355e

Browse files
aristidispmeta-codesync[bot]
authored andcommitted
Skip commented namespace directives in file_manager
Summary: `get_first_namespace_offset()` and `remove_namespace()` use raw `std::string::find()` to locate `namespace <lang>` in file content. This matches namespace text inside comments (e.g., `// namespace py3 test.bar`), producing wrong offsets. Add `find_not_in_comment()` helper that wraps `std::string::find()` and skips matches inside line comments (`//`, `#`) and block comments (`/* ... */`). Reviewed By: hchokshi Differential Revision: D93071947 fbshipit-source-id: 88d56b21480bd2794535504a804afaf6ee61950d
1 parent ce3c660 commit d4d355e

File tree

2 files changed

+98
-6
lines changed

2 files changed

+98
-6
lines changed

thrift/compiler/codemod/annotate_allow_legacy_missing_uris_test.py

Lines changed: 41 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
import os
1818
import shutil
19+
import sys
1920
import tempfile
2021
import textwrap
2122
import unittest
@@ -42,10 +43,23 @@ def _check(self, *, before: str, after: str) -> None:
4243
binary = pkg_resources.resource_filename(__name__, "codemod")
4344
run_binary(binary, "foo.thrift")
4445

45-
self.assertEqual(
46-
read_file("foo.thrift"),
47-
textwrap.dedent(after),
48-
)
46+
expected = textwrap.dedent(after)
47+
actual = read_file("foo.thrift")
48+
49+
if actual != expected:
50+
print(
51+
f"""
52+
EXPECTED: [
53+
{expected}
54+
],
55+
ACTUAL: [
56+
{actual}
57+
]""",
58+
file=sys.stderr,
59+
flush=True,
60+
)
61+
62+
self.assertEqual(actual, expected)
4963

5064
def test_adds_annotation_package_and_include(self):
5165
self._check(
@@ -307,3 +321,26 @@ def test_no_annotation_for_typedef(self):
307321
typedef string MyString
308322
""",
309323
)
324+
325+
def test_commented_namespace_needs_include(self):
326+
"""Test that commented namespace directives are correctly ignored when determining where to add includes and package annotations."""
327+
self._check(
328+
before="""\
329+
// namespace py3 test.bar
330+
namespace py3 test.foo
331+
332+
struct TestStruct {}
333+
""",
334+
after="""\
335+
// namespace py3 test.bar
336+
include "thrift/annotation/thrift.thrift"
337+
338+
339+
@thrift.AllowLegacyMissingUris
340+
package;
341+
342+
namespace py3 test.foo
343+
344+
struct TestStruct {}
345+
""",
346+
)

thrift/compiler/codemod/file_manager.cc

Lines changed: 57 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,61 @@ size_t get_first_character_of_this_line(
5656
}
5757
}
5858

59+
/**
60+
* Returns true if `pos` falls inside any comment: line comments (`//`, `#`)
61+
* or block comments. Block comments do not nest in Thrift.
62+
*/
63+
bool is_inside_comment(const std::string& content, size_t pos) {
64+
// Scans content from the beginning up to `pos`, tracking whether we are
65+
// inside a block comment while also detecting line comments.
66+
bool inside_block = false;
67+
for (size_t i = 0; i < pos && i < content.size(); ++i) {
68+
char c = content[i];
69+
// Detect line comments (only outside block comments).
70+
if (!inside_block &&
71+
((c == '/' && i + 1 < content.size() && content[i + 1] == '/') ||
72+
c == '#')) {
73+
size_t newline = content.find('\n', i);
74+
if (newline == std::string::npos || newline >= pos) {
75+
// `pos` is on this comment line.
76+
return true;
77+
}
78+
i = newline; // loop increment will advance past '\n'
79+
continue;
80+
}
81+
if (!inside_block && c == '/' && i + 1 < content.size() &&
82+
content[i + 1] == '*') {
83+
inside_block = true;
84+
++i; // skip '*'
85+
} else if (
86+
inside_block && c == '*' && i + 1 < content.size() &&
87+
content[i + 1] == '/') {
88+
inside_block = false;
89+
++i; // skip '/'
90+
}
91+
}
92+
return inside_block;
93+
}
94+
95+
/**
96+
* Like `std::string::find()`, but skips matches that appear inside comments
97+
* (line comments starting with `//` or `#`, or block comments).
98+
*/
99+
size_t find_not_in_comment(
100+
const std::string& content, const std::string& pattern, size_t start_pos) {
101+
size_t pos = start_pos;
102+
while (true) {
103+
pos = content.find(pattern, pos);
104+
if (pos == std::string::npos) {
105+
return std::string::npos;
106+
}
107+
if (!is_inside_comment(content, pos)) {
108+
return pos;
109+
}
110+
pos += pattern.size();
111+
}
112+
}
113+
59114
} // namespace
60115

61116
std::string file_manager::get_new_content() const {
@@ -284,7 +339,7 @@ void file_manager::remove_namespace(std::string language) {
284339
program_->namespaces().find(language) != program_->namespaces().end()) {
285340
// get offsets for the namespace statement
286341
auto ns_stmt = fmt::format("namespace {} ", language);
287-
auto begin_offset = old_content_.find(ns_stmt, 0);
342+
size_t begin_offset = find_not_in_comment(old_content_, ns_stmt, 0);
288343
size_t end_offset = old_content_.length();
289344
if (begin_offset != std::string::npos) {
290345
end_offset = old_content_.find('\n', begin_offset);
@@ -305,7 +360,7 @@ std::optional<size_t> file_manager::get_first_namespace_offset() const {
305360
// Finds the offset of first namespace statement in the file.
306361
for (const auto& [lang, _] : program_->namespaces()) {
307362
const auto ns_stmt = "namespace " + lang;
308-
const auto offset = old_content_.find(ns_stmt, 0);
363+
const size_t offset = find_not_in_comment(old_content_, ns_stmt, 0);
309364
if (offset != std::string::npos && min_offset > offset) {
310365
min_offset = offset;
311366
}

0 commit comments

Comments
 (0)