diff --git a/pkg/aws/cloudwatch/histograms/README.md b/pkg/aws/cloudwatch/histograms/README.md
new file mode 100644
index 0000000000000..f1f40dd309c1a
--- /dev/null
+++ b/pkg/aws/cloudwatch/histograms/README.md
@@ -0,0 +1,14 @@
+# Histograms
+
+This package holds common CloudWatch histogram functionality for AWS owned OpenTelemetry components.
+
+## Visualize histogram mappings
+
+1. Remove `t.Skip(...)` from `TestWriteInputHistograms` and run the test to generate json files for the input histograms.
+1. Remove `t.Skip(...)` from `TestWriteConvertedHistograms` and run the test to generate json files for the converted histograms.
+1. Run `histogram_mapping.py` to generate visualizations
+
+```bash
+pip install matplotlib numpy
+python histogram_mappings.py
+````
diff --git a/pkg/aws/cloudwatch/histograms/conversion.go b/pkg/aws/cloudwatch/histograms/conversion.go
new file mode 100644
index 0000000000000..c2652e1eb16bb
--- /dev/null
+++ b/pkg/aws/cloudwatch/histograms/conversion.go
@@ -0,0 +1,317 @@
+// Copyright The OpenTelemetry Authors
+// SPDX-License-Identifier: Apache-2.0
+
+package histograms // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/aws/cloudwatch/histograms"
+
+import (
+	"math"
+
+	"go.opentelemetry.io/collector/pdata/pmetric"
+
+	"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/aws/cloudwatch"
+)
+
+type ExponentialMapping struct {
+	maximum     float64
+	minimum     float64
+	sampleCount float64
+	sum         float64
+	values      []float64
+	counts      []float64
+}
+
+var _ (cloudwatch.HistogramDataPoint) = (*ExponentialMapping)(nil)
+
+// ConvertOTelToCloudWatch converts an OpenTelemetry histogram datapoint to a CloudWatch histogram datapoint using
+// exponential mapping
+func ConvertOTelToCloudWatch(dp pmetric.HistogramDataPoint) cloudwatch.HistogramDataPoint {
+	// maximumInnerBucketCount is the maximum number of inner buckets that each outer bucket can be represented with
+	//
+	// A larger values increase the resolution at which the data is sub-sampled while also incurring additional memory
+	// allocation, processing time, and the maximum number of value/count pairs that are sent to CloudWatch which could
+	// cause a CloudWatch PutMetricData / PutLogEvent request to be split into multiple requests due to the 100/150
+	// metric datapoint limit.
+	const maximumInnerBucketCount = 10
+
+	// No validations - assuming valid input histogram
+
+	em := &ExponentialMapping{
+		maximum:     dp.Max(),
+		minimum:     dp.Min(),
+		sampleCount: float64(dp.Count()),
+		sum:         dp.Sum(),
+	}
+
+	// bounds specifies the boundaries between buckets
+	// bucketCounts specifies the number of datapoints in each bucket
+	// there is always 1 more bucket count than there is boundaries
+	// len(bucketCounts) = len(bounds) + 1
+	bounds := dp.ExplicitBounds()
+	lenBounds := bounds.Len()
+	bucketCounts := dp.BucketCounts()
+	lenBucketCounts := bucketCounts.Len()
+
+	// Special case: no boundaries implies a single bucket
+	if lenBounds == 0 {
+		em.counts = append(em.counts, float64(bucketCounts.At(0))) // recall that len(bucketCounts) = len(bounds)+1
+		switch {
+		case dp.HasMax() && dp.HasMin():
+			em.values = append(em.values, em.minimum/2.0+em.maximum/2.0)
+		case dp.HasMax():
+			em.values = append(em.values, em.maximum) // only data point we have is the maximum
+		case dp.HasMin():
+			em.values = append(em.values, em.minimum) // only data point we have is the minimum
+		default:
+			em.values = append(em.values, 0) // arbitrary value
+		}
+		return em
+	}
+
+	// To create inner buckets, all outer buckets need to have defined boundaries. The first and last bucket use the
+	// min and max and their lower and upper bounds respectively. The min and max are optional on the OTel datapoint.
+	// When min and max are not defined, make some reasonable about about what the min/max could be
+	if !dp.HasMin() {
+		// Find the first bucket which contains some data points. The min must be in that bucket
+		minBucketIdx := 0
+		for i := 0; i < lenBucketCounts; i++ {
+			if bucketCounts.At(i) > 0 {
+				minBucketIdx = i
+				break
+			}
+		}
+
+		// take the lower bound of the bucket. lower bound of bucket index n is boundary index n-1
+		if minBucketIdx != 0 {
+			em.minimum = bounds.At(minBucketIdx - 1)
+		} else {
+			bucketWidth := 0.001 // arbitrary width - there's no information about this histogram to make an inference with if there are no bounds
+			if lenBounds > 1 {
+				bucketWidth = bounds.At(1) - bounds.At(0)
+			}
+			em.minimum = bounds.At(0) - bucketWidth
+
+			// if all boundaries are positive, assume all data is positive. this covers use cases where Prometheus
+			// histogram metrics for non-zero values like request durations have their first bucket start at 0. for
+			// these metrics, a negative minimum will cause percentile metrics to be unavailable
+			if bounds.At(0) >= 0 {
+				em.minimum = max(em.minimum, 0.0)
+			}
+		}
+	}
+
+	if !dp.HasMax() {
+		// Find the last bucket with some data in it. The max must be in that bucket
+		maxBucketIdx := lenBounds - 1
+		for i := lenBucketCounts - 1; i >= 0; i-- {
+			if bucketCounts.At(i) > 0 {
+				maxBucketIdx = i
+				break
+			}
+		}
+
+		// we want the upper bound of the bucket. the upper bound of bucket index n is boundary index n
+		if maxBucketIdx <= lenBounds-1 {
+			em.maximum = bounds.At(maxBucketIdx)
+		} else {
+			bucketWidth := 0.01 // arbitrary width - there's no information about this histogram to make an inference with
+			if lenBounds > 1 {
+				bucketWidth = bounds.At(lenBounds-1) - bounds.At(lenBounds-2)
+			}
+			em.maximum = bounds.At(lenBounds-1) + bucketWidth
+		}
+	}
+
+	// Pre-calculate total output size to avoid dynamic growth
+	totalOutputSize := 0
+	for i := 0; i < lenBucketCounts; i++ {
+		sampleCount := bucketCounts.At(i)
+		if sampleCount > 0 {
+			totalOutputSize += int(min(sampleCount, maximumInnerBucketCount))
+		}
+	}
+	if totalOutputSize == 0 {
+		// No samples in any bucket
+		return em
+	}
+
+	em.values = make([]float64, 0, totalOutputSize)
+	em.counts = make([]float64, 0, totalOutputSize)
+
+	for i := 0; i < lenBucketCounts; i++ {
+		sampleCount := int(bucketCounts.At(i))
+		if sampleCount == 0 {
+			// No need to operate on a bucket with no samples
+			continue
+		}
+
+		lowerBound := em.minimum
+		if i > 0 {
+			lowerBound = bounds.At(i - 1)
+		}
+		upperBound := em.maximum
+		if i < lenBucketCounts-1 {
+			upperBound = bounds.At(i)
+		}
+
+		// This algorithm creates "inner buckets" between user-defined bucket based on the sample count, up to a
+		// maximum. A logarithmic ratio (named "magnitude") compares the density between the current bucket and the
+		// next bucket. This logarithmic ratio is used to decide how to spread samples amongst inner buckets.
+		//
+		// case 1: magnitude < 0
+		//   * What this means: Current bucket is denser than the next bucket -> density is decreasing.
+		//   * What we do: Use inverse quadratic distribution to spread the samples. This allocates more samples towards
+		//     the lower bound of the bucket.
+		// case 2: 0 <= magnitude < 1
+		//   * What this means: Current bucket and next bucket has similar densities -> density is not changing much.
+		//   * What we do: Use inform distribution to spread the samples. Extra samples that can't be spread evenly are
+		//     (arbitrarily) allocated towards the start of the bucket.
+		// case 3: 1 <= magnitude
+		//   * What this means: Current bucket is less dense than the next bucket -> density is increasing.
+		//   * What we do: Use quadratic distribution to spread the samples. This allocates more samples toward the end
+		//     of the bucket.
+		//
+		// As a small optimization, we omit the logarithm invocation and change the thresholds.
+		ratio := 0.0
+		if i < lenBucketCounts-1 {
+			nextSampleCount := bucketCounts.At(i + 1)
+			// If next bucket is empty, than density is surely decreasing
+			if nextSampleCount == 0 {
+				ratio = 0.0
+			} else {
+				var nextUpperBound float64
+				if i+1 == lenBucketCounts-1 {
+					nextUpperBound = em.maximum
+				} else {
+					nextUpperBound = bounds.At(i + 1)
+				}
+
+				// original calculations for reference
+				// currentBucketDensity := float64(sampleCount) / (upperBound - lowerBound)
+				// nextBucketDensity := float64(nextSampleCount) / (nextUpperBound - upperBound)
+				// ratio = nextBucketDensity / currentBucketDensity
+				//
+				// the following calculations are the same but improves speed by ~1% in benchmark tests
+				denom := (nextUpperBound - upperBound) * float64(sampleCount)
+				numerator := (upperBound - lowerBound) * float64(nextSampleCount)
+				ratio = numerator / denom
+			}
+		}
+
+		// innerBucketCount is how many "inner buckets" to spread the sample count amongst
+		innerBucketCount := min(sampleCount, maximumInnerBucketCount)
+		delta := (upperBound - lowerBound) / float64(innerBucketCount)
+
+		switch {
+		case ratio < 1.0/math.E: // magnitude < 0: Use -yx^2 (inverse quadratic)
+			sigma := float64(sumOfSquares(innerBucketCount))
+			epsilon := float64(sampleCount) / sigma
+			entryStart := len(em.counts)
+
+			runningSum := 0
+			for j := 0; j < innerBucketCount; j++ {
+				innerBucketSampleCount := epsilon * float64((j-innerBucketCount)*(j-innerBucketCount))
+				innerBucketSampleCountAdjusted := int(math.Floor(innerBucketSampleCount))
+				if innerBucketSampleCountAdjusted > 0 {
+					runningSum += innerBucketSampleCountAdjusted
+					em.values = append(em.values, lowerBound+delta*float64(j+1))
+					em.counts = append(em.counts, float64(innerBucketSampleCountAdjusted))
+				}
+			}
+
+			// distribute the remainder towards the front
+			remainder := sampleCount - runningSum
+			// make sure there's room for the remainder
+			if len(em.counts) < entryStart+remainder {
+				em.counts = append(em.counts, make([]float64, remainder)...)
+				em.values = append(em.values, make([]float64, remainder)...)
+			}
+			for j := 0; j < remainder; j++ {
+				em.counts[entryStart]++
+				entryStart++
+			}
+		case ratio < math.E: // 0 <= magnitude < 1: Use x
+			// Distribute samples evenly with integer counts
+			baseCount := sampleCount / innerBucketCount
+			remainder := sampleCount % innerBucketCount
+			for j := 1; j <= innerBucketCount; j++ {
+				count := baseCount
+
+				// Distribute remainder to first few buckets
+				if j <= remainder {
+					count++
+				}
+				em.values = append(em.values, lowerBound+delta*float64(j))
+				em.counts = append(em.counts, float64(count))
+			}
+		default: // magnitude >= 1: Use yx^2 (quadratic)
+			sigma := float64(sumOfSquares(innerBucketCount))
+			epsilon := float64(sampleCount) / sigma
+			entryStart := len(em.counts)
+
+			runningSum := 0
+			for j := 1; j <= innerBucketCount; j++ {
+				innerBucketSampleCount := epsilon * float64(j*j)
+				innerBucketSampleCountAdjusted := int(math.Floor(innerBucketSampleCount))
+				if innerBucketSampleCountAdjusted > 0 {
+					runningSum += innerBucketSampleCountAdjusted
+					em.values = append(em.values, lowerBound+delta*float64(j))
+					em.counts = append(em.counts, float64(innerBucketSampleCountAdjusted))
+				}
+			}
+
+			// distribute the remainder towards the end
+			remainder := sampleCount - runningSum
+			// make sure there's room for the remainder
+			if len(em.counts) < entryStart+remainder {
+				em.counts = append(em.counts, make([]float64, remainder)...)
+				em.values = append(em.values, make([]float64, remainder)...)
+			}
+			entryStart = len(em.counts) - 1
+			for j := 0; j < remainder; j++ {
+				em.counts[entryStart]++
+				entryStart--
+			}
+		}
+	}
+
+	// Move last entry to maximum if needed
+	if dp.HasMax() && len(em.values) > 0 {
+		lastIdx := len(em.values) - 1
+		for i := lastIdx; i >= 0; i-- {
+			if em.counts[i] > 0 {
+				lastIdx = i
+				break
+			}
+		}
+		em.values[lastIdx] = em.maximum
+		em.values = em.values[:lastIdx+1]
+		em.counts = em.counts[:lastIdx+1]
+	}
+
+	return em
+}
+
+func (em *ExponentialMapping) ValuesAndCounts() ([]float64, []float64) {
+	return em.values, em.counts
+}
+
+func (em *ExponentialMapping) Minimum() float64 {
+	return em.minimum
+}
+
+func (em *ExponentialMapping) Maximum() float64 {
+	return em.maximum
+}
+
+func (em *ExponentialMapping) SampleCount() float64 {
+	return em.sampleCount
+}
+
+func (em *ExponentialMapping) Sum() float64 {
+	return em.sum
+}
+
+// sumOfSquares is a closed form calculation of Σx^2, for 1 to n
+func sumOfSquares(n int) int {
+	return n * (n + 1) * (2*n + 1) / 6
+}
diff --git a/pkg/aws/cloudwatch/histograms/conversion_test.go b/pkg/aws/cloudwatch/histograms/conversion_test.go
new file mode 100644
index 0000000000000..2a43594a2ed82
--- /dev/null
+++ b/pkg/aws/cloudwatch/histograms/conversion_test.go
@@ -0,0 +1,304 @@
+// Copyright The OpenTelemetry Authors
+// SPDX-License-Identifier: Apache-2.0
+
+package histograms // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/aws/cloudwatch/histograms"
+
+import (
+	"encoding/csv"
+	"encoding/json"
+	"fmt"
+	"math"
+	"os"
+	"sort"
+	"strconv"
+	"strings"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+	"go.opentelemetry.io/collector/pdata/pmetric"
+
+	"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/aws/cloudwatch"
+)
+
+var filenameReplacer = strings.NewReplacer(
+	" ", "_",
+	"/", "_",
+)
+
+func TestWriteInputHistograms(t *testing.T) {
+	t.Skip("only used to create test data for visualization")
+	for _, tc := range TestCases() {
+		jsonData, err := json.MarshalIndent(tc.Input, "", "  ")
+		require.NoError(t, err)
+		_ = os.Mkdir("testdata/input", os.ModePerm)
+		require.NoError(t, os.WriteFile("testdata/input/"+filenameReplacer.Replace(tc.Name)+".json", jsonData, 0o600))
+	}
+}
+
+func TestWriteConvertedHistograms(t *testing.T) {
+	t.Skip("only used to create test data for visualization")
+	for _, tc := range TestCases() {
+		t.Run(tc.Name, func(t *testing.T) {
+			dp := setupDatapoint(tc.Input)
+			dist := ConvertOTelToCloudWatch(dp)
+			_ = os.Mkdir("testdata/exponential", os.ModePerm)
+			assert.NoError(t, writeValuesAndCountsToJSON(dist, "testdata/exponential/"+filenameReplacer.Replace(tc.Name+".json")))
+		})
+	}
+}
+
+func TestConvertOTelToCloudWatch(t *testing.T) {
+	for _, tc := range TestCases() {
+		t.Run(tc.Name, func(t *testing.T) {
+			dp := setupDatapoint(tc.Input)
+			dist := ConvertOTelToCloudWatch(dp)
+			verifyDist(t, dist, tc.Expected)
+		})
+	}
+
+	t.Run("accuracy test - lognormal", func(t *testing.T) {
+		verifyDistAccuracy(t, ConvertOTelToCloudWatch, "testdata/lognormal_10000.csv")
+	})
+
+	t.Run("accuracy test - weibull", func(t *testing.T) {
+		verifyDistAccuracy(t, ConvertOTelToCloudWatch, "testdata/weibull_10000.csv")
+	})
+}
+
+func BenchmarkLogNormal(b *testing.B) {
+	// arrange
+	boundaries := []float64{
+		0.001, 0.002, 0.003, 0.004, 0.005, 0.006, 0.007, 0.008, 0.009, 0.01,
+		0.011, 0.012, 0.013, 0.014, 0.015, 0.016, 0.017, 0.018, 0.019, 0.02,
+		0.021, 0.022, 0.023, 0.024, 0.025, 0.026, 0.027, 0.028, 0.029, 0.03,
+		0.031, 0.032, 0.033, 0.034, 0.035, 0.036, 0.037, 0.038, 0.039, 0.04,
+		0.041, 0.042, 0.043, 0.044, 0.045, 0.046, 0.047, 0.048, 0.049, 0.05,
+		0.1, 0.2,
+	}
+
+	data, err := loadCsvData("testdata/lognormal_10000.csv")
+	require.NoError(b, err)
+	require.Len(b, data, 10000)
+
+	dp := createHistogramDatapointFromData(data, boundaries)
+	require.Equal(b, 10000, int(dp.Count()))
+
+	b.Run("NewExponentialMappingCWFromOtel", func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			dist := ConvertOTelToCloudWatch(dp)
+			values, counts := dist.ValuesAndCounts()
+			assert.NotNil(b, values)
+			assert.NotNil(b, counts)
+		}
+	})
+}
+
+func BenchmarkWeibull(b *testing.B) {
+	// arrange
+	boundaries := []float64{
+		0.001, 0.002, 0.003, 0.004, 0.005, 0.006, 0.007, 0.008, 0.009, 0.01,
+		0.011, 0.012, 0.013, 0.014, 0.015, 0.016, 0.017, 0.018, 0.019, 0.02,
+		0.021, 0.022, 0.023, 0.024, 0.025, 0.026, 0.027, 0.028, 0.029, 0.03,
+		0.031, 0.032, 0.033, 0.034, 0.035, 0.036, 0.037, 0.038, 0.039, 0.04,
+		0.041, 0.042, 0.043, 0.044, 0.045, 0.046, 0.047, 0.048, 0.049, 0.05,
+		0.1, 0.2,
+	}
+
+	data, err := loadCsvData("testdata/weibull_10000.csv")
+	require.NoError(b, err)
+	require.Len(b, data, 10000)
+
+	dp := createHistogramDatapointFromData(data, boundaries)
+	require.Equal(b, 10000, int(dp.Count()))
+
+	b.Run("NewExponentialMappingCWFromOtel", func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			dist := ConvertOTelToCloudWatch(dp)
+			values, counts := dist.ValuesAndCounts()
+			assert.NotNil(b, values)
+			assert.NotNil(b, counts)
+		}
+	})
+}
+
+func setupDatapoint(input HistogramInput) pmetric.HistogramDataPoint {
+	dp := pmetric.NewHistogramDataPoint()
+	dp.SetCount(input.Count)
+	dp.SetSum(input.Sum)
+	if input.Min != nil {
+		dp.SetMin(*input.Min)
+	}
+	if input.Max != nil {
+		dp.SetMax(*input.Max)
+	}
+	dp.ExplicitBounds().FromRaw(input.Boundaries)
+	dp.BucketCounts().FromRaw(input.Counts)
+	return dp
+}
+
+func verifyDist(t *testing.T, dist cloudwatch.HistogramDataPoint, expected ExpectedMetrics) {
+	if expected.Min != nil {
+		assert.Equal(t, *expected.Min, dist.Minimum(), "min does not match expected")
+	}
+	if expected.Max != nil {
+		assert.Equal(t, *expected.Max, dist.Maximum(), "max does not match expected")
+	}
+	assert.Equal(t, int(expected.Count), int(dist.SampleCount()), "samplecount does not match expected")
+	assert.Equal(t, expected.Sum, dist.Sum(), "sum does not match expected")
+
+	values, counts := dist.ValuesAndCounts()
+
+	calculatedCount := 0.0
+	for _, count := range counts {
+		calculatedCount += count
+		// fmt.Printf("%7.2f = %4d (%d)\n", values[i], int(counts[i]), calculatedCount)
+	}
+	assert.InDelta(t, float64(expected.Count), calculatedCount, 1e-6, "calculated count does not match expected")
+
+	for p, r := range expected.PercentileRanges {
+		x := int(math.Round(float64(dist.SampleCount()) * p))
+
+		soFar := 0
+		for i, count := range counts {
+			soFar += int(count)
+			if soFar >= x {
+				// fmt.Printf("Found p%.f at bucket %0.2f. Expected range: %+v\n", p*100, values[i], r)
+				assert.GreaterOrEqual(t, values[i], r.Low, "percentile %0.2f", p)
+				assert.LessOrEqual(t, values[i], r.High, "percentile %0.2f", p)
+				break
+			}
+		}
+	}
+}
+
+func loadCsvData(filename string) ([]float64, error) {
+	file, err := os.Open(filename)
+	if err != nil {
+		return nil, err
+	}
+	defer file.Close()
+
+	reader := csv.NewReader(file)
+	records, err := reader.ReadAll()
+	if err != nil {
+		return nil, err
+	}
+
+	var data []float64
+	for _, value := range records[0] {
+		f, err := strconv.ParseFloat(strings.TrimSpace(value), 64)
+		if err != nil {
+			return nil, err
+		}
+		data = append(data, f)
+	}
+	return data, nil
+}
+
+func createHistogramDatapointFromData(data []float64, boundaries []float64) pmetric.HistogramDataPoint {
+	dp := pmetric.NewHistogramDataPoint()
+
+	// Calculate basic stats
+	var sum float64
+	minimum := math.Inf(1)
+	maximum := math.Inf(-1)
+
+	for _, v := range data {
+		sum += v
+		if v < minimum {
+			minimum = v
+		}
+		if v > maximum {
+			maximum = v
+		}
+	}
+
+	dp.SetCount(uint64(len(data)))
+	dp.SetSum(sum)
+	dp.SetMin(minimum)
+	dp.SetMax(maximum)
+
+	// Create bucket counts
+	bucketCounts := make([]uint64, len(boundaries)+1)
+
+	for _, v := range data {
+		bucket := sort.SearchFloat64s(boundaries, v)
+		bucketCounts[bucket]++
+	}
+
+	dp.ExplicitBounds().FromRaw(boundaries)
+	dp.BucketCounts().FromRaw(bucketCounts)
+
+	return dp
+}
+
+func verifyDistAccuracy(t *testing.T, newDistFunc func(pmetric.HistogramDataPoint) cloudwatch.HistogramDataPoint, filename string) {
+	// arrange
+	percentiles := []float64{0.1, 0.25, 0.5, 0.75, 0.9, 0.99, 0.999}
+	boundaries := []float64{
+		0.001, 0.002, 0.003, 0.004, 0.005, 0.006, 0.007, 0.008, 0.009, 0.01,
+		0.011, 0.012, 0.013, 0.014, 0.015, 0.016, 0.017, 0.018, 0.019, 0.02,
+		0.021, 0.022, 0.023, 0.024, 0.025, 0.026, 0.027, 0.028, 0.029, 0.03,
+		0.031, 0.032, 0.033, 0.034, 0.035, 0.036, 0.037, 0.038, 0.039, 0.04,
+		0.041, 0.042, 0.043, 0.044, 0.045, 0.046, 0.047, 0.048, 0.049, 0.05,
+		0.1, 0.2,
+	}
+
+	data, err := loadCsvData(filename)
+	require.NoError(t, err)
+	assert.Len(t, data, 10000)
+
+	dp := createHistogramDatapointFromData(data, boundaries)
+	assert.Equal(t, 10000, int(dp.Count()))
+	calculatedTotal := 0
+	for _, count := range dp.BucketCounts().All() {
+		calculatedTotal += int(count)
+	}
+	assert.Equal(t, 10000, calculatedTotal)
+
+	// act
+	dist := newDistFunc(dp)
+	values, counts := dist.ValuesAndCounts()
+
+	// assert
+	calculatedCount := 0.0
+	for _, count := range counts {
+		calculatedCount += count
+	}
+	assert.InDelta(t, 10000, calculatedCount, 1e-6, "calculated count does not match expected")
+
+	for _, p := range percentiles {
+		x1 := int(math.Round(float64(dp.Count()) * p))
+		x2 := int(math.Round(calculatedCount * p))
+
+		exactPercentileValue := data[x1]
+
+		soFar := 0
+		for i, count := range counts {
+			soFar += int(count)
+			if soFar >= x2 {
+				calculatedPercentileValue := values[i]
+				errorPercent := (exactPercentileValue - calculatedPercentileValue) / exactPercentileValue * 100
+				fmt.Printf("P%.1f: exact=%.6f, calculated=%.6f, error=%.2f%%\n", p*100, exactPercentileValue, calculatedPercentileValue, errorPercent)
+				break
+			}
+		}
+	}
+}
+
+func writeValuesAndCountsToJSON(dist cloudwatch.HistogramDataPoint, filename string) error {
+	values, counts := dist.ValuesAndCounts()
+
+	data := make(map[string]any)
+	data["values"] = values
+	data["counts"] = counts
+	data["sum"] = dist.Sum()
+
+	jsonData, err := json.MarshalIndent(data, "", "  ")
+	if err != nil {
+		return err
+	}
+
+	return os.WriteFile(filename, jsonData, 0o600)
+}
diff --git a/pkg/aws/cloudwatch/histograms/testdata/.gitignore b/pkg/aws/cloudwatch/histograms/testdata/.gitignore
new file mode 100644
index 0000000000000..0f0d6bdaf849c
--- /dev/null
+++ b/pkg/aws/cloudwatch/histograms/testdata/.gitignore
@@ -0,0 +1,3 @@
+comparisons/
+exponential/
+input/
\ No newline at end of file
diff --git a/pkg/aws/cloudwatch/histograms/testdata/histogram_mappings.py b/pkg/aws/cloudwatch/histograms/testdata/histogram_mappings.py
new file mode 100644
index 0000000000000..6fa92abf6be25
--- /dev/null
+++ b/pkg/aws/cloudwatch/histograms/testdata/histogram_mappings.py
@@ -0,0 +1,169 @@
+import argparse
+import json
+import math
+import matplotlib.pyplot as plt
+import numpy as np
+import os
+import pdb
+
+from pathlib import Path
+from typing import Dict, List, Tuple
+
+def plot_input_histogram(data, ax, title: str, color: str):
+    """Plot input histogram using exact bucket boundaries."""
+    boundaries = data.get('Boundaries', [])
+    counts = data['Counts']
+    min_val = data.get('Min')
+    max_val = data.get('Max')
+    summ = data.get('Sum')
+    total_count = sum(counts)
+    
+    # Handle case with no boundaries (single bucket)
+    if not boundaries or len(boundaries) == 0:
+        if min_val is not None and max_val is not None:
+            left_edges = [min_val]
+            widths = [max_val - min_val]
+        else:
+            # Use arbitrary range if no min/max
+            left_edges = [-10]
+            widths = [20]
+    else:
+        # Calculate exact bucket edges and widths
+        left_edges = []
+        widths = []
+        
+        for i in range(len(counts)):
+            if i == 0:
+                # First bucket: from min to first boundary
+                left = min_val if min_val is not None else boundaries[0] - (boundaries[1] - boundaries[0]) if len(boundaries) > 1 else boundaries[0] - 10
+                right = boundaries[0]
+            elif i == len(counts) - 1:
+                # Last bucket: from last boundary to max
+                left = boundaries[i-1]
+                right = max_val if max_val is not None else boundaries[i-1] + (boundaries[i-1] - boundaries[i-2]) if len(boundaries) > 1 else boundaries[i-1] + 10
+            else:
+                # Middle buckets: between boundaries
+                left = boundaries[i-1]
+                right = boundaries[i]
+            
+            left_edges.append(left)
+            widths.append(right - left)
+    
+    ax.bar(left_edges, counts, width=widths, alpha=0.7, edgecolor='black', linewidth=0.8, color=color, align='edge')
+    ax.set_title(f'{title} (Count: {total_count}, Sum: {summ})')
+    ax.set_ylabel('Counts')
+    ax.grid(True, alpha=0.3)
+
+def plot_cw_histogram_bars(histogram: Dict[float, float], histogram_min: float, histogram_max: float, histogram_sum: float, ax, title: str, color: str):
+    """Plot histogram bars on given axes."""
+    values = sorted(histogram.keys())
+    counts = [histogram[v] for v in values]
+    total_count = sum(counts)
+    
+    if len(values) == 1:
+        # Single bar case
+        width = (histogram_max - histogram_min) * 0.8
+        ax.bar(values, counts, width=width, alpha=0.7, edgecolor='black', linewidth=1.5, color=color)
+    else:
+        # Calculate minimum gap to prevent overlaps
+        gaps = [values[i+1] - values[i] for i in range(len(values)-1)]
+        min_gap = min(gaps)
+        max_width = min_gap * 0.8  # Use 80% of minimum gap
+        
+        widths = []
+        for i in range(len(values)):
+            if i == 0:
+                # First bar: extend to histogram_min or use half-gap to next
+                left_space = values[0] - histogram_min
+                right_space = (values[1] - values[0]) / 2 if len(values) > 1 else (histogram_max - values[0])
+                width = min(left_space + right_space, max_width)
+            elif i == len(values) - 1:
+                # Last bar: extend to histogram_max or use half-gap from previous
+                left_space = (values[i] - values[i-1]) / 2
+                right_space = histogram_max - values[i]
+                width = min(left_space + right_space, max_width)
+            else:
+                # Middle bars: use half-gaps on both sides
+                left_space = (values[i] - values[i-1]) / 2
+                right_space = (values[i+1] - values[i]) / 2
+                width = min(left_space + right_space, max_width)
+            
+            widths.append(width)
+        
+        ax.bar(values, counts, width=widths, alpha=0.7, edgecolor='black', linewidth=0.8, color=color)
+    
+    ax.scatter(values, counts, color='red', s=50, zorder=5)
+    ax.set_title(f'{title} (Count: {total_count}, Sum: {histogram_sum})')
+    ax.set_ylabel('Counts')
+    ax.grid(True, alpha=0.3)
+
+def load_json_data(filepath):
+    """Load histogram data from JSON file."""
+    with open(filepath, 'r') as f:
+        data = json.load(f)
+    return data['values'], data['counts'], data['sum']
+
+def load_input_histogram(filepath):
+    """Load input histogram format."""
+    with open(filepath, 'r') as f:
+        data = json.load(f)
+    return data
+
+def plot_all_folders_comparison(json_filename):
+    """Plot the same JSON file from all folders for comparison."""
+    base_path = Path('.')
+    folders = ['input', 'exponential']
+    colors = ['black', 'green']
+    
+    fig, ax = plt.subplots(len(folders), 1, figsize=(12, 20))
+    
+    i = -1
+    for folder, color in zip(folders, colors):
+        i += 1
+        filepath = base_path / folder / (json_filename+".json")
+        if filepath.exists():
+            try:
+                if folder == 'input':
+                    data = load_input_histogram(filepath)
+                    plot_input_histogram(data, ax[i], f'{folder.capitalize()} Mapping', color)
+                else:
+                    values, counts, summ = load_json_data(filepath)
+                    if not values:  # Skip if no values
+                        continue
+                    hist = {values[j]: counts[j] for j in range(len(values))}
+                    plot_cw_histogram_bars(hist, min(values), max(values), summ, ax[i], f'{folder.capitalize()} Mapping', color)
+            except Exception as e:
+                print(f"Error processing {filepath}: {e}")
+    
+    plt.tight_layout()
+    plt.savefig(f"comparisons/{json_filename}.png", dpi=300, bbox_inches='tight')
+    plt.show()
+
+# Example usage
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='Process histogram mappings')
+    parser.add_argument('dataset', nargs='?', help='Optional dataset name to process')
+    args = parser.parse_args()
+    
+    os.makedirs('comparisons', exist_ok=True)
+
+    input_path = Path('./input')
+    if input_path.exists():
+        if args.dataset:
+            # Process specific dataset if provided
+            dataset_file = input_path / f"{args.dataset}.json"
+            if dataset_file.exists():
+                print(f"Processing {args.dataset}...")
+                plot_all_folders_comparison(args.dataset)
+            else:
+                print(f"Dataset '{args.dataset}' not found in input folder.")
+        else:
+            # Process all datasets if no specific dataset provided
+            json_files = [f.stem for f in input_path.iterdir() if f.suffix == '.json']
+            for json_file in json_files:
+                print(f"Processing {json_file}...")
+                plot_all_folders_comparison(json_file)
+    else:
+        print("Input folder not found.")
+
+