@@ -98,9 +98,11 @@ func TestGenerateCacheKey(t *testing.T) {
9898 cmdArgs : []string {"sh" , "ec" , "test" },
9999 image : "python:3.11" ,
100100 want : & cachekey.CacheKey {
101+ // CRITICAL: Artifact identifiers now include URIs in "name@uri" format
102+ // to ensure different data sources produce different cache keys.
101103 InputArtifactNames : map [string ]* cachekey.ArtifactNameList {
102- "dataset_one" : {ArtifactNames : []string {"1" }},
103- "dataset_two" : {ArtifactNames : []string {"2" }},
104+ "dataset_one" : {ArtifactNames : []string {"1@gs://some-bucket/dataset-one " }},
105+ "dataset_two" : {ArtifactNames : []string {"2@gs://some-bucket/dataset-two " }},
104106 },
105107 InputParameterValues : map [string ]* structpb.Value {
106108 "message" : {Kind : & structpb.Value_StringValue {StringValue : "Some string value" }},
@@ -187,8 +189,9 @@ func TestGenerateCacheKey(t *testing.T) {
187189 image : "python:3.11" ,
188190 pvcNames : []string {"workspace-pvc" , "data-pvc" },
189191 want : & cachekey.CacheKey {
192+ // CRITICAL: Artifact identifiers now include URIs in "name@uri" format
190193 InputArtifactNames : map [string ]* cachekey.ArtifactNameList {
191- "dataset_one" : {ArtifactNames : []string {"1" }},
194+ "dataset_one" : {ArtifactNames : []string {"1@gs://some-bucket/dataset-one " }},
192195 },
193196 InputParameterValues : map [string ]* structpb.Value {
194197 "message" : {Kind : & structpb.Value_StringValue {StringValue : "Some string value" }},
@@ -353,6 +356,129 @@ func TestGenerateFingerPrint(t *testing.T) {
353356 }
354357}
355358
359+ // TestGenerateFingerPrint_ConsidersArtifactURIs verifies that different artifact URIs
360+ // produce different fingerprints, even when artifact names are identical.
361+ // This test validates the fix for the critical cache key bug where only artifact names
362+ // (not URIs) were included in the cache key, causing incorrect cache reuse across
363+ // different data sources.
364+ func TestGenerateFingerPrint_ConsidersArtifactURIs (t * testing.T ) {
365+ cacheClient , err := NewClient ("ml-pipeline.kubeflow" , "8887" , false , & tls.Config {})
366+ require .NoError (t , err )
367+
368+ // Create two inputs with same artifact name but different URIs
369+ inputsWithURIA := & pipelinespec.ExecutorInput_Inputs {
370+ Artifacts : map [string ]* pipelinespec.ArtifactList {
371+ "dataset" : {
372+ Artifacts : []* pipelinespec.RuntimeArtifact {
373+ {
374+ Name : "data" ,
375+ Uri : "gs://bucket-a/dataset/v1" ,
376+ },
377+ },
378+ },
379+ },
380+ }
381+
382+ inputsWithURIB := & pipelinespec.ExecutorInput_Inputs {
383+ Artifacts : map [string ]* pipelinespec.ArtifactList {
384+ "dataset" : {
385+ Artifacts : []* pipelinespec.RuntimeArtifact {
386+ {
387+ Name : "data" , // Same name as above
388+ Uri : "gs://bucket-b/dataset/v2" , // Different URI
389+ },
390+ },
391+ },
392+ },
393+ }
394+
395+ inputsWithSameURI := & pipelinespec.ExecutorInput_Inputs {
396+ Artifacts : map [string ]* pipelinespec.ArtifactList {
397+ "dataset" : {
398+ Artifacts : []* pipelinespec.RuntimeArtifact {
399+ {
400+ Name : "data" ,
401+ Uri : "gs://bucket-a/dataset/v1" , // Same URI as inputsWithURIA
402+ },
403+ },
404+ },
405+ },
406+ }
407+
408+ outputs := & pipelinespec.ExecutorInput_Outputs {
409+ Parameters : map [string ]* pipelinespec.ExecutorInput_OutputParameter {
410+ "output" : {OutputFile : "/tmp/output" },
411+ },
412+ }
413+ outputParamTypes := map [string ]string {"output" : "STRING" }
414+
415+ // Generate cache keys
416+ cacheKeyA , err := cacheClient .GenerateCacheKey (inputsWithURIA , outputs , outputParamTypes , []string {"echo" }, "python:3.11" , nil )
417+ require .NoError (t , err )
418+
419+ cacheKeyB , err := cacheClient .GenerateCacheKey (inputsWithURIB , outputs , outputParamTypes , []string {"echo" }, "python:3.11" , nil )
420+ require .NoError (t , err )
421+
422+ cacheKeySameAsA , err := cacheClient .GenerateCacheKey (inputsWithSameURI , outputs , outputParamTypes , []string {"echo" }, "python:3.11" , nil )
423+ require .NoError (t , err )
424+
425+ // Generate fingerprints
426+ fingerprintA , err := cacheClient .GenerateFingerPrint (cacheKeyA )
427+ require .NoError (t , err )
428+
429+ fingerprintB , err := cacheClient .GenerateFingerPrint (cacheKeyB )
430+ require .NoError (t , err )
431+
432+ fingerprintSameAsA , err := cacheClient .GenerateFingerPrint (cacheKeySameAsA )
433+ require .NoError (t , err )
434+
435+ // CRITICAL ASSERTIONS:
436+ // Different URIs MUST produce different fingerprints to prevent incorrect cache reuse
437+ assert .NotEqual (t , fingerprintA , fingerprintB ,
438+ "CRITICAL BUG: Same artifact name with different URIs produced same fingerprint! " +
439+ "This would cause incorrect cache reuse across different data sources." )
440+
441+ // Same URIs should produce same fingerprints (cache hit is correct)
442+ assert .Equal (t , fingerprintA , fingerprintSameAsA ,
443+ "Same artifact name and URI should produce same fingerprint for correct cache hits." )
444+
445+ // Verify the cache key contains the URI in the artifact identifier
446+ assert .Contains (t , cacheKeyA .InputArtifactNames ["dataset" ].ArtifactNames [0 ], "gs://bucket-a/dataset/v1" ,
447+ "Cache key should contain the artifact URI" )
448+ assert .Contains (t , cacheKeyB .InputArtifactNames ["dataset" ].ArtifactNames [0 ], "gs://bucket-b/dataset/v2" ,
449+ "Cache key should contain the artifact URI" )
450+ }
451+
452+ // TestGenerateCacheKey_ArtifactWithoutURI verifies that artifacts without URIs
453+ // still work correctly (backward compatibility).
454+ func TestGenerateCacheKey_ArtifactWithoutURI (t * testing.T ) {
455+ cacheClient , err := NewClient ("ml-pipeline.kubeflow" , "8887" , false , & tls.Config {})
456+ require .NoError (t , err )
457+
458+ inputsNoURI := & pipelinespec.ExecutorInput_Inputs {
459+ Artifacts : map [string ]* pipelinespec.ArtifactList {
460+ "dataset" : {
461+ Artifacts : []* pipelinespec.RuntimeArtifact {
462+ {
463+ Name : "data" ,
464+ Uri : "" , // No URI
465+ },
466+ },
467+ },
468+ },
469+ }
470+
471+ outputs := & pipelinespec.ExecutorInput_Outputs {}
472+ outputParamTypes := map [string ]string {}
473+
474+ cacheKey , err := cacheClient .GenerateCacheKey (inputsNoURI , outputs , outputParamTypes , []string {"echo" }, "python:3.11" , nil )
475+ require .NoError (t , err )
476+
477+ // When URI is empty, the artifact identifier should just be the name
478+ assert .Equal (t , "data" , cacheKey .InputArtifactNames ["dataset" ].ArtifactNames [0 ],
479+ "Artifact without URI should use just the name as identifier" )
480+ }
481+
356482func TestGenerateFingerPrint_ConsidersPVCNames (t * testing.T ) {
357483 base := & cachekey.CacheKey {
358484 InputArtifactNames : map [string ]* cachekey.ArtifactNameList {
0 commit comments