@@ -866,6 +866,100 @@ def _infer_result_type(
866866 transform : ptransform .PTransform ,
867867 inputs : Sequence [Union [pvalue .PBegin , pvalue .PCollection ]],
868868 result_pcollection : Union [pvalue .PValue , pvalue .DoOutputsTuple ]) -> None :
869+ """Infer and set the output element type for a PCollection.
870+
871+ This function determines the output types of transforms by combining:
872+ 1. Concrete input types from previous transforms
873+ 2. Type hints declared on the current transform
874+ 3. Type variable binding and substitution
875+
876+ TYPE VARIABLE BINDING
877+ ---------------------
878+ Type variables (K, V, T, etc.) act as placeholders that get bound to
879+ concrete types through pattern matching. This requires both an input
880+ pattern and an output template:
881+
882+ Input Pattern (from .with_input_types()):
883+ Defines where in the input to find each type variable
884+ Example: Tuple[K, V] means "K is the first element, V is the second"
885+
886+ Output Template (from .with_output_types()):
887+ Defines how to use the bound variables in the output
888+ Example: Tuple[V, K] means "swap the positions"
889+
890+ CONCRETE TYPES VS TYPE VARIABLES
891+ ---------------------------------
892+ The system handles these differently:
893+
894+ Concrete Types (e.g., str, int, Tuple[str, int]):
895+ - Used as-is without any binding
896+ - Do not fall back to Any
897+ - Example: .with_output_types(Tuple[str, int]) → Tuple[str, int]
898+
899+ Type Variables (e.g., K, V, T):
900+ - Must be bound through pattern matching
901+ - Require .with_input_types() to provide the pattern
902+ - Fall back to Any if not bound
903+ - Example without pattern: Tuple[K, V] → Tuple[Any, Any]
904+ - Example with pattern: Tuple[K, V] → Tuple[str, int]
905+
906+ BINDING ALGORITHM
907+ -----------------
908+ 1. Match: Compare input pattern to concrete input
909+ Pattern: Tuple[K, V]
910+ Concrete: Tuple[str, int]
911+ Result: {K: str, V: int} ← Bindings created
912+
913+ 2. Substitute: Apply bindings to output template
914+ Template: Tuple[V, K] ← Note: swapped!
915+ Bindings: {K: str, V: int}
916+ Result: Tuple[int, str] ← Swapped concrete types
917+
918+ Each transform operates in its own type inference scope. Type variables
919+ declared in a parent composite transform do NOT automatically propagate
920+ to child transforms.
921+
922+ Parent scope (composite):
923+ @with_input_types(Tuple[K, V]) ← K, V defined here
924+ class MyComposite(PTransform):
925+ def expand(self, pcoll):
926+ # Child scope - parent's K, V are NOT available
927+ return pcoll | ChildTransform()
928+
929+ Type variables that remain unbound after inference fall back to Any:
930+
931+ EXAMPLES
932+ --------
933+ Example 1: Concrete types (no variables)
934+ Input: Tuple[str, int]
935+ Transform: .with_output_types(Tuple[str, int])
936+ Output: Tuple[str, int] ← Used as-is
937+
938+ Example 2: Type variables with pattern (correct)
939+ Input: Tuple[str, int]
940+ Transform: .with_input_types(Tuple[K, V])
941+ .with_output_types(Tuple[V, K])
942+ Binding: {K: str, V: int}
943+ Output: Tuple[int, str] ← Swapped!
944+
945+ Example 3: Type variables without pattern (falls back to Any)
946+ Input: Tuple[str, int]
947+ Transform: .with_output_types(Tuple[K, V]) ← No input pattern!
948+ Binding: None (can't match)
949+ Output: Tuple[Any, Any] ← Fallback
950+
951+ Example 4: Mixed concrete and variables
952+ Input: Tuple[str, int]
953+ Transform: .with_input_types(Tuple[str, V])
954+ .with_output_types(Tuple[str, V])
955+ Binding: {V: int} ← Only V needs binding
956+ Output: Tuple[str, int] ← str passed through, V bound to int
957+
958+ Args:
959+ transform: The PTransform being applied
960+ inputs: Input PCollections (provides concrete types)
961+ result_pcollection: Output PCollection to set type on
962+ """
869963 # TODO(robertwb): Multi-input inference.
870964 type_options = self ._options .view_as (TypeOptions )
871965 if type_options is None or not type_options .pipeline_type_check :
@@ -881,6 +975,7 @@ def _infer_result_type(
881975 else typehints .Union [input_element_types_tuple ])
882976 type_hints = transform .get_type_hints ()
883977 declared_output_type = type_hints .simple_output_type (transform .label )
978+
884979 if declared_output_type :
885980 input_types = type_hints .input_types
886981 if input_types and input_types [0 ]:
@@ -893,6 +988,7 @@ def _infer_result_type(
893988 result_element_type = declared_output_type
894989 else :
895990 result_element_type = transform .infer_output_type (input_element_type )
991+
896992 # Any remaining type variables have no bindings higher than this scope.
897993 result_pcollection .element_type = typehints .bind_type_variables (
898994 result_element_type , {'*' : typehints .Any })
0 commit comments