|  | 
|  | 1 | +use std::ops::ControlFlow; | 
|  | 2 | + | 
|  | 3 | +use clippy_utils::diagnostics::span_lint_hir_and_then; | 
|  | 4 | +use clippy_utils::ty::is_type_lang_item; | 
|  | 5 | +use clippy_utils::visitors::for_each_expr; | 
|  | 6 | +use clippy_utils::{eq_expr_value, higher, path_to_local_id}; | 
|  | 7 | +use rustc_errors::{Applicability, MultiSpan}; | 
|  | 8 | +use rustc_hir::{Expr, ExprKind, LangItem, Node, Pat, PatKind}; | 
|  | 9 | +use rustc_lint::LateContext; | 
|  | 10 | +use rustc_middle::ty::Ty; | 
|  | 11 | +use rustc_span::{Span, sym}; | 
|  | 12 | + | 
|  | 13 | +use super::CHARS_ENUMERATE_FOR_BYTE_INDICES; | 
|  | 14 | + | 
|  | 15 | +// The list of `str` methods we want to lint that have a `usize` argument representing a byte index. | 
|  | 16 | +// Note: `String` also has methods that work with byte indices, | 
|  | 17 | +// but they all take `&mut self` and aren't worth considering since the user couldn't have called | 
|  | 18 | +// them while the chars iterator is live anyway. | 
|  | 19 | +const BYTE_INDEX_METHODS: &[&str] = &[ | 
|  | 20 | +    "is_char_boundary", | 
|  | 21 | +    "floor_char_boundary", | 
|  | 22 | +    "ceil_char_boundary", | 
|  | 23 | +    "get", | 
|  | 24 | +    "index", | 
|  | 25 | +    "index_mut", | 
|  | 26 | +    "get_mut", | 
|  | 27 | +    "get_unchecked", | 
|  | 28 | +    "get_unchecked_mut", | 
|  | 29 | +    "slice_unchecked", | 
|  | 30 | +    "slice_mut_unchecked", | 
|  | 31 | +    "split_at", | 
|  | 32 | +    "split_at_mut", | 
|  | 33 | +    "split_at_checked", | 
|  | 34 | +    "split_at_mut_checked", | 
|  | 35 | +]; | 
|  | 36 | + | 
|  | 37 | +const CONTINUE: ControlFlow<!, ()> = ControlFlow::Continue(()); | 
|  | 38 | + | 
|  | 39 | +pub(super) fn check<'tcx>(cx: &LateContext<'tcx>, pat: &Pat<'_>, iterable: &Expr<'_>, body: &'tcx Expr<'tcx>) { | 
|  | 40 | +    if let ExprKind::MethodCall(_, enumerate_recv, _, enumerate_span) = iterable.kind | 
|  | 41 | +        && let Some(method_id) = cx.typeck_results().type_dependent_def_id(iterable.hir_id) | 
|  | 42 | +        && cx.tcx.is_diagnostic_item(sym::enumerate_method, method_id) | 
|  | 43 | +        && let ExprKind::MethodCall(_, chars_recv, _, chars_span) = enumerate_recv.kind | 
|  | 44 | +        && let Some(method_id) = cx.typeck_results().type_dependent_def_id(enumerate_recv.hir_id) | 
|  | 45 | +        && cx.tcx.is_diagnostic_item(sym::str_chars, method_id) | 
|  | 46 | +    { | 
|  | 47 | +        if let PatKind::Tuple([pat, _], _) = pat.kind | 
|  | 48 | +            && let PatKind::Binding(_, binding_id, ..) = pat.kind | 
|  | 49 | +        { | 
|  | 50 | +            // Destructured iterator element `(idx, _)`, look for uses of the binding | 
|  | 51 | +            for_each_expr(cx, body, |expr| { | 
|  | 52 | +                if path_to_local_id(expr, binding_id) { | 
|  | 53 | +                    check_index_usage(cx, expr, pat, enumerate_span, chars_span, chars_recv); | 
|  | 54 | +                } | 
|  | 55 | +                CONTINUE | 
|  | 56 | +            }); | 
|  | 57 | +        } else if let PatKind::Binding(_, binding_id, ..) = pat.kind { | 
|  | 58 | +            // Bound as a tuple, look for `tup.0` | 
|  | 59 | +            for_each_expr(cx, body, |expr| { | 
|  | 60 | +                if let ExprKind::Field(e, field) = expr.kind | 
|  | 61 | +                    && path_to_local_id(e, binding_id) | 
|  | 62 | +                    && field.name == sym::integer(0) | 
|  | 63 | +                { | 
|  | 64 | +                    check_index_usage(cx, expr, pat, enumerate_span, chars_span, chars_recv); | 
|  | 65 | +                } | 
|  | 66 | +                CONTINUE | 
|  | 67 | +            }); | 
|  | 68 | +        } | 
|  | 69 | +    } | 
|  | 70 | +} | 
|  | 71 | + | 
|  | 72 | +fn check_index_usage<'tcx>( | 
|  | 73 | +    cx: &LateContext<'tcx>, | 
|  | 74 | +    expr: &'tcx Expr<'tcx>, | 
|  | 75 | +    pat: &Pat<'_>, | 
|  | 76 | +    enumerate_span: Span, | 
|  | 77 | +    chars_span: Span, | 
|  | 78 | +    chars_recv: &Expr<'_>, | 
|  | 79 | +) { | 
|  | 80 | +    let Some(parent_expr) = index_consumed_at(cx, expr) else { | 
|  | 81 | +        return; | 
|  | 82 | +    }; | 
|  | 83 | + | 
|  | 84 | +    let is_string_like = |ty: Ty<'_>| ty.is_str() || is_type_lang_item(cx, ty, LangItem::String); | 
|  | 85 | +    let message = match parent_expr.kind { | 
|  | 86 | +        ExprKind::MethodCall(segment, recv, ..) | 
|  | 87 | +            if cx.typeck_results().expr_ty_adjusted(recv).peel_refs().is_str() | 
|  | 88 | +                && BYTE_INDEX_METHODS.contains(&segment.ident.name.as_str()) | 
|  | 89 | +                && eq_expr_value(cx, chars_recv, recv) => | 
|  | 90 | +        { | 
|  | 91 | +            "passing a character position to a method that expects a byte index" | 
|  | 92 | +        }, | 
|  | 93 | +        ExprKind::Index(target, ..) | 
|  | 94 | +            if is_string_like(cx.typeck_results().expr_ty_adjusted(target).peel_refs()) | 
|  | 95 | +                && eq_expr_value(cx, chars_recv, target) => | 
|  | 96 | +        { | 
|  | 97 | +            "indexing into a string with a character position where a byte index is expected" | 
|  | 98 | +        }, | 
|  | 99 | +        _ => return, | 
|  | 100 | +    }; | 
|  | 101 | + | 
|  | 102 | +    span_lint_hir_and_then( | 
|  | 103 | +        cx, | 
|  | 104 | +        CHARS_ENUMERATE_FOR_BYTE_INDICES, | 
|  | 105 | +        expr.hir_id, | 
|  | 106 | +        expr.span, | 
|  | 107 | +        message, | 
|  | 108 | +        |diag| { | 
|  | 109 | +            diag.note("a character can take up more than one byte, so they are not interchangeable") | 
|  | 110 | +                .span_note( | 
|  | 111 | +                    MultiSpan::from_spans(vec![pat.span, enumerate_span]), | 
|  | 112 | +                    "position comes from the enumerate iterator", | 
|  | 113 | +                ) | 
|  | 114 | +                .span_suggestion_verbose( | 
|  | 115 | +                    chars_span.to(enumerate_span), | 
|  | 116 | +                    "consider using `.char_indices()` instead", | 
|  | 117 | +                    "char_indices()", | 
|  | 118 | +                    Applicability::MaybeIncorrect, | 
|  | 119 | +                ); | 
|  | 120 | +        }, | 
|  | 121 | +    ); | 
|  | 122 | +} | 
|  | 123 | + | 
|  | 124 | +/// Returns the expression which ultimately consumes the index. | 
|  | 125 | +/// This is usually the parent expression, i.e. `.split_at(idx)` for `idx`, | 
|  | 126 | +/// but for `.get(..idx)` we want to consider the method call the consuming expression, | 
|  | 127 | +/// which requires skipping past the range expression. | 
|  | 128 | +fn index_consumed_at<'tcx>(cx: &LateContext<'tcx>, expr: &'tcx Expr<'tcx>) -> Option<&'tcx Expr<'tcx>> { | 
|  | 129 | +    for (_, node) in cx.tcx.hir().parent_iter(expr.hir_id) { | 
|  | 130 | +        match node { | 
|  | 131 | +            Node::Expr(expr) if higher::Range::hir(expr).is_some() => {}, | 
|  | 132 | +            Node::ExprField(_) => {}, | 
|  | 133 | +            Node::Expr(expr) => return Some(expr), | 
|  | 134 | +            _ => break, | 
|  | 135 | +        } | 
|  | 136 | +    } | 
|  | 137 | +    None | 
|  | 138 | +} | 
0 commit comments