@@ -155,3 +155,60 @@ func SPG(logPropActions []mat.Tensor, logProbTargets []mat.Tensor) mat.Tensor {
155155 }
156156 return ag .Neg (loss )
157157}
158+
159+ // Huber measures the Huber loss between each element in the input x and target y, controlled by
160+ // the threshold (delta). Below the threshold, it behaves like MSE; above it, it becomes linear
161+ // in order to reduce the effect of outliers. If reduceMean is true, it returns the average loss;
162+ // otherwise it returns the sum of the losses.
163+ //
164+ // Huber(d) = { 0.5 * (d^2) if |d| ≤ δ
165+ //
166+ // δ * (|d| - 0.5 * δ) otherwise }
167+ //
168+ // Here, d = x - y.
169+ func Huber (x , y mat.Tensor , delta float64 , reduceMean bool ) mat.Tensor {
170+ // 1) Compute d = x - y, then |d|
171+ d := ag .Sub (x , y )
172+ absD := ag .Abs (d )
173+
174+ // 2) Build a scalar tensor from 'delta'
175+ // then multiply it by the shape of absD (via OnesLike) to broadcast
176+ // the scalar across all elements. This avoids dimension mismatch with Min().
177+ deltaMat := x .Value ().(mat.Matrix ).NewScalar (delta )
178+ deltaVec := ag .ProdScalar (x .Value ().(mat.Matrix ).OnesLike (), deltaMat )
179+
180+ // 3) clipped = min(|d|, deltaVec)
181+ clipped := ag .Min (absD , deltaVec )
182+
183+ // 4) 0.5 * (clipped)^2
184+ halfSqr := ag .ProdScalar (ag .Square (clipped ), x .Value ().(mat.Matrix ).NewScalar (0.5 ))
185+
186+ // 5) deltaVec * (|d| - clipped)
187+ linear := ag .Prod (deltaVec , ag .Sub (absD , clipped ))
188+
189+ // 6) Combine
190+ loss := ag .Add (halfSqr , linear )
191+
192+ // 7) reduceMean or sum
193+ if reduceMean {
194+ return ag .ReduceMean (loss )
195+ }
196+ return ag .ReduceSum (loss )
197+ }
198+
199+ // HuberSeq calculates the Huber loss on multiple (predicted, target) pairs.
200+ // It sums the Huber loss across the entire sequence, optionally averaging it
201+ // by the number of elements if reduceMean is true.
202+ func HuberSeq (predicted , target []mat.Tensor , delta float64 , reduceMean bool ) mat.Tensor {
203+ // Accumulate the Huber loss across the sequence
204+ loss := Huber (predicted [0 ], target [0 ], delta , false )
205+ for i := 1 ; i < len (predicted ); i ++ {
206+ loss = ag .Add (loss , Huber (predicted [i ], target [i ], delta , false ))
207+ }
208+
209+ // Optionally divide by length to get mean
210+ if reduceMean {
211+ return ag .DivScalar (loss , loss .Value ().(mat.Matrix ).NewScalar (float64 (len (predicted ))))
212+ }
213+ return loss
214+ }
0 commit comments