@@ -5100,5 +5100,269 @@ void BackwardFunction(Tensor<T> gradient)
51005100
51015101 return node ;
51025102 }
5103+
5104+ /// <summary>
5105+ /// Performs graph convolution operation for graph neural networks.
5106+ /// </summary>
5107+ /// <param name="input">Input node features of shape [batch, numNodes, inputFeatures]</param>
5108+ /// <param name="adjacency">Adjacency matrix of shape [batch, numNodes, numNodes]</param>
5109+ /// <param name="weights">Weight matrix of shape [inputFeatures, outputFeatures]</param>
5110+ /// <param name="bias">Optional bias vector of shape [outputFeatures]</param>
5111+ /// <returns>Output node features of shape [batch, numNodes, outputFeatures]</returns>
5112+ /// <remarks>
5113+ /// <para>
5114+ /// This operation implements graph convolution: output = adjacency @ (input @ weights) + bias.
5115+ /// It aggregates features from neighboring nodes according to the graph structure defined by the adjacency matrix.
5116+ /// </para>
5117+ /// <para>
5118+ /// Forward pass:
5119+ /// 1. Transform node features: X' = X @ W
5120+ /// 2. Aggregate via graph structure: output = A @ X'
5121+ /// 3. Add bias: output = output + b
5122+ /// </para>
5123+ /// <para>
5124+ /// Backward pass gradients:
5125+ /// - ∂L/∂X = A^T @ (∂L/∂out) @ W^T
5126+ /// - ∂L/∂W = X^T @ A^T @ (∂L/∂out)
5127+ /// - ∂L/∂b = sum(∂L/∂out) across batch and nodes
5128+ /// - ∂L/∂A = (∂L/∂out) @ (X @ W)^T
5129+ /// </para>
5130+ /// <para><b>For Beginners:</b> This operation helps neural networks learn from graph-structured data.
5131+ ///
5132+ /// Think of it like spreading information through a social network:
5133+ /// - Each person (node) has certain features
5134+ /// - The adjacency matrix shows who is connected to whom
5135+ /// - This operation lets each person's features be influenced by their connections
5136+ /// - The weights control how features are transformed during this process
5137+ /// </para>
5138+ /// </remarks>
5139+ public static ComputationNode < T > GraphConv (
5140+ ComputationNode < T > input ,
5141+ ComputationNode < T > adjacency ,
5142+ ComputationNode < T > weights ,
5143+ ComputationNode < T > ? bias = null )
5144+ {
5145+ var numOps = MathHelper . GetNumericOperations < T > ( ) ;
5146+ var inputShape = input . Value . Shape ;
5147+ var adjShape = adjacency . Value . Shape ;
5148+ var weightsShape = weights . Value . Shape ;
5149+
5150+ // Validate shapes
5151+ if ( inputShape . Length != 3 )
5152+ throw new ArgumentException ( "Input must be 3D tensor [batch, numNodes, inputFeatures]" ) ;
5153+ if ( adjShape . Length != 3 || adjShape [ 1 ] != adjShape [ 2 ] )
5154+ throw new ArgumentException ( "Adjacency must be 3D tensor [batch, numNodes, numNodes]" ) ;
5155+ if ( weightsShape . Length != 2 )
5156+ throw new ArgumentException ( "Weights must be 2D tensor [inputFeatures, outputFeatures]" ) ;
5157+ if ( inputShape [ 0 ] != adjShape [ 0 ] )
5158+ throw new ArgumentException ( $ "Batch size mismatch: input { inputShape [ 0 ] } vs adjacency { adjShape [ 0 ] } ") ;
5159+ if ( inputShape [ 1 ] != adjShape [ 1 ] )
5160+ throw new ArgumentException ( $ "Number of nodes mismatch: input { inputShape [ 1 ] } vs adjacency { adjShape [ 1 ] } ") ;
5161+ if ( inputShape [ 2 ] != weightsShape [ 0 ] )
5162+ throw new ArgumentException ( $ "Feature size mismatch: input features { inputShape [ 2 ] } vs weights { weightsShape [ 0 ] } ") ;
5163+ if ( bias != null && ( bias . Value . Shape . Length != 1 || bias . Value . Shape [ 0 ] != weightsShape [ 1 ] ) )
5164+ throw new ArgumentException ( $ "Bias must be 1D tensor with { weightsShape [ 1 ] } elements") ;
5165+
5166+ int batchSize = inputShape [ 0 ] ;
5167+ int numNodes = inputShape [ 1 ] ;
5168+ int inputFeatures = inputShape [ 2 ] ;
5169+ int outputFeatures = weightsShape [ 1 ] ;
5170+
5171+ var output = new Tensor < T > ( [ batchSize , numNodes , outputFeatures ] ) ;
5172+
5173+ // Forward pass: A @ (X @ W) + b
5174+ // Step 1: X @ W
5175+ var xw = new Tensor < T > ( [ batchSize , numNodes , outputFeatures ] ) ;
5176+ for ( int b = 0 ; b < batchSize ; b ++ )
5177+ {
5178+ for ( int n = 0 ; n < numNodes ; n ++ )
5179+ {
5180+ for ( int outF = 0 ; outF < outputFeatures ; outF ++ )
5181+ {
5182+ T sum = numOps . Zero ;
5183+ for ( int inF = 0 ; inF < inputFeatures ; inF ++ )
5184+ {
5185+ sum = numOps . Add ( sum , numOps . Multiply (
5186+ input . Value [ b , n , inF ] ,
5187+ weights . Value [ inF , outF ] ) ) ;
5188+ }
5189+ xw [ b , n , outF ] = sum ;
5190+ }
5191+ }
5192+ }
5193+
5194+ // Step 2: A @ (X @ W)
5195+ for ( int b = 0 ; b < batchSize ; b ++ )
5196+ {
5197+ for ( int i = 0 ; i < numNodes ; i ++ )
5198+ {
5199+ for ( int outF = 0 ; outF < outputFeatures ; outF ++ )
5200+ {
5201+ T sum = numOps . Zero ;
5202+ for ( int j = 0 ; j < numNodes ; j ++ )
5203+ {
5204+ sum = numOps . Add ( sum , numOps . Multiply (
5205+ adjacency . Value [ b , i , j ] ,
5206+ xw [ b , j , outF ] ) ) ;
5207+ }
5208+ output [ b , i , outF ] = sum ;
5209+ }
5210+ }
5211+ }
5212+
5213+ // Step 3: Add bias
5214+ if ( bias != null )
5215+ {
5216+ for ( int b = 0 ; b < batchSize ; b ++ )
5217+ {
5218+ for ( int n = 0 ; n < numNodes ; n ++ )
5219+ {
5220+ for ( int outF = 0 ; outF < outputFeatures ; outF ++ )
5221+ {
5222+ output [ b , n , outF ] = numOps . Add ( output [ b , n , outF ] , bias . Value [ outF ] ) ;
5223+ }
5224+ }
5225+ }
5226+ }
5227+
5228+ // Backward function
5229+ void BackwardFunction ( Tensor < T > gradient )
5230+ {
5231+ // Gradient w.r.t. input: A^T @ grad @ W^T
5232+ if ( input . RequiresGradient )
5233+ {
5234+ var inputGradient = new Tensor < T > ( inputShape ) ;
5235+
5236+ for ( int b = 0 ; b < batchSize ; b ++ )
5237+ {
5238+ for ( int i = 0 ; i < numNodes ; i ++ )
5239+ {
5240+ for ( int inF = 0 ; inF < inputFeatures ; inF ++ )
5241+ {
5242+ T sum = numOps . Zero ;
5243+ for ( int j = 0 ; j < numNodes ; j ++ )
5244+ {
5245+ for ( int outF = 0 ; outF < outputFeatures ; outF ++ )
5246+ {
5247+ // A^T[i,j] = A[j,i]
5248+ sum = numOps . Add ( sum , numOps . Multiply (
5249+ numOps . Multiply ( adjacency . Value [ b , j , i ] , gradient [ b , j , outF ] ) ,
5250+ weights . Value [ inF , outF ] ) ) ;
5251+ }
5252+ }
5253+ inputGradient [ b , i , inF ] = sum ;
5254+ }
5255+ }
5256+ }
5257+
5258+ if ( input . Gradient == null )
5259+ input . Gradient = inputGradient ;
5260+ else
5261+ input . Gradient = input . Gradient . Add ( inputGradient ) ;
5262+ }
5263+
5264+ // Gradient w.r.t. weights: X^T @ A^T @ grad
5265+ if ( weights . RequiresGradient )
5266+ {
5267+ var weightsGradient = new Tensor < T > ( weightsShape ) ;
5268+
5269+ for ( int inF = 0 ; inF < inputFeatures ; inF ++ )
5270+ {
5271+ for ( int outF = 0 ; outF < outputFeatures ; outF ++ )
5272+ {
5273+ T sum = numOps . Zero ;
5274+ for ( int b = 0 ; b < batchSize ; b ++ )
5275+ {
5276+ for ( int i = 0 ; i < numNodes ; i ++ )
5277+ {
5278+ for ( int j = 0 ; j < numNodes ; j ++ )
5279+ {
5280+ // A^T[j,i] = A[i,j]
5281+ sum = numOps . Add ( sum , numOps . Multiply (
5282+ numOps . Multiply ( input . Value [ b , j , inF ] , adjacency . Value [ b , i , j ] ) ,
5283+ gradient [ b , i , outF ] ) ) ;
5284+ }
5285+ }
5286+ }
5287+ weightsGradient [ inF , outF ] = sum ;
5288+ }
5289+ }
5290+
5291+ if ( weights . Gradient == null )
5292+ weights . Gradient = weightsGradient ;
5293+ else
5294+ weights . Gradient = weights . Gradient . Add ( weightsGradient ) ;
5295+ }
5296+
5297+ // Gradient w.r.t. bias: sum across batch and nodes
5298+ if ( bias != null && bias . RequiresGradient )
5299+ {
5300+ var biasGradient = new Tensor < T > ( [ outputFeatures ] ) ;
5301+
5302+ for ( int outF = 0 ; outF < outputFeatures ; outF ++ )
5303+ {
5304+ T sum = numOps . Zero ;
5305+ for ( int b = 0 ; b < batchSize ; b ++ )
5306+ {
5307+ for ( int n = 0 ; n < numNodes ; n ++ )
5308+ {
5309+ sum = numOps . Add ( sum , gradient [ b , n , outF ] ) ;
5310+ }
5311+ }
5312+ biasGradient [ outF ] = sum ;
5313+ }
5314+
5315+ if ( bias . Gradient == null )
5316+ bias . Gradient = biasGradient ;
5317+ else
5318+ bias . Gradient = bias . Gradient . Add ( biasGradient ) ;
5319+ }
5320+
5321+ // Gradient w.r.t. adjacency: grad @ (X @ W)^T
5322+ if ( adjacency . RequiresGradient )
5323+ {
5324+ var adjGradient = new Tensor < T > ( adjShape ) ;
5325+
5326+ for ( int b = 0 ; b < batchSize ; b ++ )
5327+ {
5328+ for ( int i = 0 ; i < numNodes ; i ++ )
5329+ {
5330+ for ( int j = 0 ; j < numNodes ; j ++ )
5331+ {
5332+ T sum = numOps . Zero ;
5333+ for ( int outF = 0 ; outF < outputFeatures ; outF ++ )
5334+ {
5335+ sum = numOps . Add ( sum , numOps . Multiply (
5336+ gradient [ b , i , outF ] ,
5337+ xw [ b , j , outF ] ) ) ;
5338+ }
5339+ adjGradient [ b , i , j ] = sum ;
5340+ }
5341+ }
5342+ }
5343+
5344+ if ( adjacency . Gradient == null )
5345+ adjacency . Gradient = adjGradient ;
5346+ else
5347+ adjacency . Gradient = adjacency . Gradient . Add ( adjGradient ) ;
5348+ }
5349+ }
5350+
5351+ var parents = new List < ComputationNode < T > > { input , adjacency , weights } ;
5352+ if ( bias != null ) parents . Add ( bias ) ;
5353+
5354+ var node = new ComputationNode < T > (
5355+ value : output ,
5356+ requiresGradient : input . RequiresGradient || adjacency . RequiresGradient || weights . RequiresGradient || ( bias ? . RequiresGradient ?? false ) ,
5357+ parents : parents ,
5358+ backwardFunction : BackwardFunction ,
5359+ name : null ) ;
5360+
5361+ var tape = GradientTape < T > . Current ;
5362+ if ( tape != null && tape . IsRecording )
5363+ tape . RecordOperation ( node ) ;
5364+
5365+ return node ;
5366+ }
51035367}
51045368}
0 commit comments