Skip to content

Commit 3130dda

Browse files
authored
huber loss implementation (#127)
1 parent ce6cb99 commit 3130dda

File tree

2 files changed

+95
-0
lines changed

2 files changed

+95
-0
lines changed

losses/losses.go

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,3 +155,60 @@ func SPG(logPropActions []mat.Tensor, logProbTargets []mat.Tensor) mat.Tensor {
155155
}
156156
return ag.Neg(loss)
157157
}
158+
159+
// Huber measures the Huber loss between each element in the input x and target y, controlled by
160+
// the threshold (delta). Below the threshold, it behaves like MSE; above it, it becomes linear
161+
// in order to reduce the effect of outliers. If reduceMean is true, it returns the average loss;
162+
// otherwise it returns the sum of the losses.
163+
//
164+
// Huber(d) = { 0.5 * (d^2) if |d| ≤ δ
165+
//
166+
// δ * (|d| - 0.5 * δ) otherwise }
167+
//
168+
// Here, d = x - y.
169+
func Huber(x, y mat.Tensor, delta float64, reduceMean bool) mat.Tensor {
170+
// 1) Compute d = x - y, then |d|
171+
d := ag.Sub(x, y)
172+
absD := ag.Abs(d)
173+
174+
// 2) Build a scalar tensor from 'delta'
175+
// then multiply it by the shape of absD (via OnesLike) to broadcast
176+
// the scalar across all elements. This avoids dimension mismatch with Min().
177+
deltaMat := x.Value().(mat.Matrix).NewScalar(delta)
178+
deltaVec := ag.ProdScalar(x.Value().(mat.Matrix).OnesLike(), deltaMat)
179+
180+
// 3) clipped = min(|d|, deltaVec)
181+
clipped := ag.Min(absD, deltaVec)
182+
183+
// 4) 0.5 * (clipped)^2
184+
halfSqr := ag.ProdScalar(ag.Square(clipped), x.Value().(mat.Matrix).NewScalar(0.5))
185+
186+
// 5) deltaVec * (|d| - clipped)
187+
linear := ag.Prod(deltaVec, ag.Sub(absD, clipped))
188+
189+
// 6) Combine
190+
loss := ag.Add(halfSqr, linear)
191+
192+
// 7) reduceMean or sum
193+
if reduceMean {
194+
return ag.ReduceMean(loss)
195+
}
196+
return ag.ReduceSum(loss)
197+
}
198+
199+
// HuberSeq calculates the Huber loss on multiple (predicted, target) pairs.
200+
// It sums the Huber loss across the entire sequence, optionally averaging it
201+
// by the number of elements if reduceMean is true.
202+
func HuberSeq(predicted, target []mat.Tensor, delta float64, reduceMean bool) mat.Tensor {
203+
// Accumulate the Huber loss across the sequence
204+
loss := Huber(predicted[0], target[0], delta, false)
205+
for i := 1; i < len(predicted); i++ {
206+
loss = ag.Add(loss, Huber(predicted[i], target[i], delta, false))
207+
}
208+
209+
// Optionally divide by length to get mean
210+
if reduceMean {
211+
return ag.DivScalar(loss, loss.Value().(mat.Matrix).NewScalar(float64(len(predicted))))
212+
}
213+
return loss
214+
}

losses/losses_test.go

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,3 +190,41 @@ func assertScalarEqualApprox[T float.DType](t *testing.T, expected T, actual mat
190190
v := float.ValueOf[T](actual.Item())
191191
assert.InDelta(t, expected, v, 1.0e-06)
192192
}
193+
194+
func TestHuberLoss(t *testing.T) {
195+
t.Run("float32", func(t *testing.T) { testHuberLoss[float32](t, 1.0e-6) })
196+
t.Run("float64", func(t *testing.T) { testHuberLoss[float64](t, 1.0e-12) })
197+
}
198+
199+
func testHuberLoss[T float.DType](t *testing.T, tol T) {
200+
// 1) Setup input, target, delta
201+
x := mat.NewDense[T](mat.WithBacking([]T{0.0, 2.5, 4.0}), mat.WithGrad(true))
202+
y := mat.NewDense[T](mat.WithBacking([]T{0.0, 1.0, 2.0}))
203+
delta := 1.0
204+
205+
// 2) Compute Huber loss with reduceMean = false
206+
loss := Huber(x, y, delta, false)
207+
208+
// Sum of the "Huber" terms across 3 elements => ~2.5
209+
assert.InDelta(t, 2.5, loss.Value().Item().F64(), float64(tol))
210+
211+
// 3) Backward
212+
ag.Backward(loss)
213+
214+
// For this example:
215+
// d = [0, 1.5, 2.0]; gradient = sign(d)*delta if |d|>delta else d
216+
// => [0, 1.0, 1.0]
217+
assert.InDeltaSlice(t, []T{0.0, 1.0, 1.0}, x.Grad().Data(), float64(tol))
218+
219+
// 4) Test again with reduceMean = true
220+
x2 := mat.NewDense[T](mat.WithBacking([]T{0.0, 2.5, 4.0}), mat.WithGrad(true))
221+
y2 := mat.NewDense[T](mat.WithBacking([]T{0.0, 1.0, 2.0}))
222+
loss2 := Huber(x2, y2, delta, true)
223+
224+
// The total is 2.5 for 3 elements => 2.5 / 3 = ~0.8333
225+
assert.InDelta(t, 0.8333333333333333, loss2.Value().Item().F64(), float64(tol))
226+
227+
ag.Backward(loss2)
228+
// The gradient is the same shape but divided by 3 => [0, ~0.3333, ~0.3333]
229+
assert.InDeltaSlice(t, []T{0.0, 0.3333333333333333, 0.3333333333333333}, x2.Grad().Data(), float64(tol))
230+
}

0 commit comments

Comments
 (0)