From 0b9a5e0aabda1f2606e4907c5c03e05bd6fe4078 Mon Sep 17 00:00:00 2001
From: Pasukhin Dmitry <dpasukhi@opencascade.com>
Date: Thu, 25 Dec 2025 22:56:22 +0000
Subject: [PATCH] Foundation Classes - Optimize PLib polynomial evaluation
 (#953)

- Replaced `memcpy`/`memset` calls with direct initialization loops to enable register allocation for small dimensions (1-15)
- Introduced local stack arrays that reduce memory round-trips by writing to output only at the end of computation
- Fused derivative and value update loops to minimize redundant memory reads
---
 src/FoundationClasses/TKMath/PLib/PLib.cxx | 515 ++++++++++-----------
 1 file changed, 246 insertions(+), 269 deletions(-)
diff --git a/src/FoundationClasses/TKMath/PLib/PLib.cxx b/src/FoundationClasses/TKMath/PLib/PLib.cxx
index da9d8df4a1..671d45f556 100644
--- a/src/FoundationClasses/TKMath/PLib/PLib.cxx
+++ b/src/FoundationClasses/TKMath/PLib/PLib.cxx
@@ -24,6 +24,8 @@
 #include <NCollection_LocalArray.hxx>
 #include <Standard_ConstructionError.hxx>
 
+#include <array>
+
 // To convert points array into Real ..
 // *********************************
 //=================================================================================================
@@ -701,100 +703,240 @@ void PLib::RationalDerivatives(const Standard_Integer DerivativeRequest,
 
 //=======================================================================
 // Auxiliary template functions used for optimized evaluation of polynome
-// and its derivatives for smaller dimensions of the polynome
+// and its derivatives for smaller dimensions of the polynome.
+// Uses local arrays to enable register allocation and avoids memcpy/memset.
 //=======================================================================
 
 namespace
 {
-// recursive template for evaluating value or first derivative
+// Maximum dimension for optimized template dispatch
+constexpr int THE_MAX_OPT_DIM = 15;
+
+// Evaluation of only value using local array (register-friendly).
+// Writes to output only at the end.
 template <int dim>
-inline void eval_step1(double* poly, double par, const double* coef)
+inline void eval_poly0(double* theResult, const double* theCoeffs, int theDegree, double thePar)
 {
-  eval_step1<dim - 1>(poly, par, coef);
-  poly[dim] = poly[dim] * par + coef[dim];
-}
+  std::array<double, dim> aLocal;
+  for (int i = 0; i < dim; ++i)
+  {
+    aLocal[i] = theCoeffs[i];
+  }
 
-// recursion end
-template <>
-inline void eval_step1<-1>(double*, double, const double*)
-{
-}
-
-// recursive template for evaluating second derivative
-template <int dim>
-inline void eval_step2(double* poly, double par, const double* coef)
-{
-  eval_step2<dim - 1>(poly, par, coef);
-  poly[dim] = poly[dim] * par + coef[dim] * 2.;
-}
-
-// recursion end
-template <>
-inline void eval_step2<-1>(double*, double, const double*)
-{
-}
-
-// evaluation of only value
-template <int dim>
-inline void eval_poly0(double* aRes, const double* theCoeffs, int Degree, double Par)
-{
-  Standard_Real*       aRes0   = aRes;
-  const Standard_Real* aCoeffs = theCoeffs;
-  memcpy(aRes0, aCoeffs, sizeof(Standard_Real) * dim);
-
-  for (Standard_Integer aDeg = 0; aDeg < Degree; aDeg++)
+  const double* aCoeffs = theCoeffs;
+  for (int aDeg = 0; aDeg < theDegree; ++aDeg)
   {
     aCoeffs -= dim;
-    // Calculating the value of the polynomial
-    eval_step1<dim - 1>(aRes0, Par, aCoeffs);
+    for (int i = 0; i < dim; ++i)
+    {
+      aLocal[i] = aLocal[i] * thePar + aCoeffs[i];
+    }
+  }
+
+  for (int i = 0; i < dim; ++i)
+  {
+    theResult[i] = aLocal[i];
   }
 }
 
-// evaluation of value and first derivative
+// Evaluation of value and first derivative using local arrays.
+// Fuses derivative and value loops to read aLocal0 only once per iteration.
 template <int dim>
-inline void eval_poly1(double* aRes, const double* theCoeffs, int Degree, double Par)
+inline void eval_poly1(double* theResult, const double* theCoeffs, int theDegree, double thePar)
 {
-  Standard_Real*       aRes0   = aRes;
-  Standard_Real*       aRes1   = aRes + dim;
-  const Standard_Real* aCoeffs = theCoeffs;
+  std::array<double, dim> aLocal0;
+  std::array<double, dim> aLocal1;
 
-  memcpy(aRes0, aCoeffs, sizeof(Standard_Real) * dim);
-  memset(aRes1, 0, sizeof(Standard_Real) * dim);
+  for (int i = 0; i < dim; ++i)
+  {
+    aLocal0[i] = theCoeffs[i];
+    aLocal1[i] = 0.0;
+  }
 
-  for (Standard_Integer aDeg = 0; aDeg < Degree; aDeg++)
+  const double* aCoeffs = theCoeffs;
+  for (int aDeg = 0; aDeg < theDegree; ++aDeg)
   {
     aCoeffs -= dim;
-    // Calculating derivatives of the polynomial
-    eval_step1<dim - 1>(aRes1, Par, aRes0);
-    // Calculating the value of the polynomial
-    eval_step1<dim - 1>(aRes0, Par, aCoeffs);
+    for (int i = 0; i < dim; ++i)
+    {
+      const double aVal = aLocal0[i];
+      aLocal1[i]        = aLocal1[i] * thePar + aVal;
+      aLocal0[i]        = aVal * thePar + aCoeffs[i];
+    }
+  }
+
+  for (int i = 0; i < dim; ++i)
+  {
+    theResult[i]       = aLocal0[i];
+    theResult[dim + i] = aLocal1[i];
   }
 }
 
-// evaluation of value and first and second derivatives
+// Evaluation of value and first and second derivatives using local arrays.
+// Fuses all three loops for better cache utilization.
 template <int dim>
-inline void eval_poly2(double* aRes, const double* theCoeffs, int Degree, double Par)
+inline void eval_poly2(double* theResult, const double* theCoeffs, int theDegree, double thePar)
 {
-  Standard_Real*       aRes0   = aRes;
-  Standard_Real*       aRes1   = aRes + dim;
-  Standard_Real*       aRes2   = aRes + 2 * dim;
-  const Standard_Real* aCoeffs = theCoeffs;
+  std::array<double, dim> aLocal0;
+  std::array<double, dim> aLocal1;
+  std::array<double, dim> aLocal2;
 
-  memcpy(aRes0, aCoeffs, sizeof(Standard_Real) * dim);
-  memset(aRes1, 0, sizeof(Standard_Real) * dim);
-  memset(aRes2, 0, sizeof(Standard_Real) * dim);
+  for (int i = 0; i < dim; ++i)
+  {
+    aLocal0[i] = theCoeffs[i];
+    aLocal1[i] = 0.0;
+    aLocal2[i] = 0.0;
+  }
 
-  for (Standard_Integer aDeg = 0; aDeg < Degree; aDeg++)
+  const double* aCoeffs = theCoeffs;
+  for (int aDeg = 0; aDeg < theDegree; ++aDeg)
   {
     aCoeffs -= dim;
-    // Calculating second derivatives of the polynomial
-    eval_step2<dim - 1>(aRes2, Par, aRes1);
-    // Calculating derivatives of the polynomial
-    eval_step1<dim - 1>(aRes1, Par, aRes0);
-    // Calculating the value of the polynomial
-    eval_step1<dim - 1>(aRes0, Par, aCoeffs);
+    for (int i = 0; i < dim; ++i)
+    {
+      const double aD1  = aLocal1[i];
+      const double aVal = aLocal0[i];
+      aLocal2[i]        = aLocal2[i] * thePar + aD1 * 2.0;
+      aLocal1[i]        = aD1 * thePar + aVal;
+      aLocal0[i]        = aVal * thePar + aCoeffs[i];
+    }
+  }
+
+  for (int i = 0; i < dim; ++i)
+  {
+    theResult[i]           = aLocal0[i];
+    theResult[dim + i]     = aLocal1[i];
+    theResult[2 * dim + i] = aLocal2[i];
   }
 }
+
+// Runtime evaluation for dimension > THE_MAX_OPT_DIM (value only)
+inline void eval_poly0_runtime(double*       theResult,
+                               const double* theCoeffs,
+                               int           theDegree,
+                               double        thePar,
+                               int           theDimension)
+{
+  for (int i = 0; i < theDimension; ++i)
+  {
+    theResult[i] = theCoeffs[i];
+  }
+
+  const double* aCoeffs = theCoeffs;
+  for (int aDeg = 0; aDeg < theDegree; ++aDeg)
+  {
+    aCoeffs -= theDimension;
+    for (int i = 0; i < theDimension; ++i)
+    {
+      theResult[i] = theResult[i] * thePar + aCoeffs[i];
+    }
+  }
+}
+
+// Runtime evaluation for dimension > THE_MAX_OPT_DIM (value + 1st derivative)
+inline void eval_poly1_runtime(double*       theResult,
+                               const double* theCoeffs,
+                               int           theDegree,
+                               double        thePar,
+                               int           theDimension)
+{
+  double* aRes0 = theResult;
+  double* aRes1 = theResult + theDimension;
+
+  for (int i = 0; i < theDimension; ++i)
+  {
+    aRes0[i] = theCoeffs[i];
+    aRes1[i] = 0.0;
+  }
+
+  const double* aCoeffs = theCoeffs;
+  for (int aDeg = 0; aDeg < theDegree; ++aDeg)
+  {
+    aCoeffs -= theDimension;
+    for (int i = 0; i < theDimension; ++i)
+    {
+      const double aVal = aRes0[i];
+      aRes1[i]          = aRes1[i] * thePar + aVal;
+      aRes0[i]          = aVal * thePar + aCoeffs[i];
+    }
+  }
+}
+
+// Runtime evaluation for dimension > THE_MAX_OPT_DIM (value + 1st + 2nd derivatives)
+inline void eval_poly2_runtime(double*       theResult,
+                               const double* theCoeffs,
+                               int           theDegree,
+                               double        thePar,
+                               int           theDimension)
+{
+  double* aRes0 = theResult;
+  double* aRes1 = theResult + theDimension;
+  double* aRes2 = theResult + 2 * theDimension;
+
+  for (int i = 0; i < theDimension; ++i)
+  {
+    aRes0[i] = theCoeffs[i];
+    aRes1[i] = 0.0;
+    aRes2[i] = 0.0;
+  }
+
+  const double* aCoeffs = theCoeffs;
+  for (int aDeg = 0; aDeg < theDegree; ++aDeg)
+  {
+    aCoeffs -= theDimension;
+    for (int i = 0; i < theDimension; ++i)
+    {
+      const double aD1  = aRes1[i];
+      const double aVal = aRes0[i];
+      aRes2[i]          = aRes2[i] * thePar + aD1 * 2.0;
+      aRes1[i]          = aD1 * thePar + aVal;
+      aRes0[i]          = aVal * thePar + aCoeffs[i];
+    }
+  }
+}
+
+// Function pointer type for dispatch tables
+using EvalPolyFunc = void (*)(double*, const double*, int, double);
+
+// Helper to generate dispatch tables at compile time
+template <template <int> class EvalFunc, typename FuncPtr, int... Is>
+constexpr std::array<FuncPtr, sizeof...(Is)> makeDispatchTable(std::integer_sequence<int, Is...>)
+{
+  return {{&EvalFunc<Is + 1>::call...}};
+}
+
+// Wrapper structs to allow template parameter deduction
+template <int Dim>
+struct EvalPoly0Wrapper
+{
+  static void call(double* r, const double* c, int d, double p) { eval_poly0<Dim>(r, c, d, p); }
+};
+
+template <int Dim>
+struct EvalPoly1Wrapper
+{
+  static void call(double* r, const double* c, int d, double p) { eval_poly1<Dim>(r, c, d, p); }
+};
+
+template <int Dim>
+struct EvalPoly2Wrapper
+{
+  static void call(double* r, const double* c, int d, double p) { eval_poly2<Dim>(r, c, d, p); }
+};
+
+// Dispatch tables for dimensions 1..15
+constexpr std::array<EvalPolyFunc, THE_MAX_OPT_DIM> THE_EVAL_POLY0_TABLE =
+  makeDispatchTable<EvalPoly0Wrapper, EvalPolyFunc>(
+    std::make_integer_sequence<int, THE_MAX_OPT_DIM>{});
+
+constexpr std::array<EvalPolyFunc, THE_MAX_OPT_DIM> THE_EVAL_POLY1_TABLE =
+  makeDispatchTable<EvalPoly1Wrapper, EvalPolyFunc>(
+    std::make_integer_sequence<int, THE_MAX_OPT_DIM>{});
+
+constexpr std::array<EvalPolyFunc, THE_MAX_OPT_DIM> THE_EVAL_POLY2_TABLE =
+  makeDispatchTable<EvalPoly2Wrapper, EvalPolyFunc>(
+    std::make_integer_sequence<int, THE_MAX_OPT_DIM>{});
+
 } // namespace
 
 //=======================================================================
@@ -828,173 +970,57 @@ void PLib::EvalPolynomial(const Standard_Real    Par,
 {
   const Standard_Real* aCoeffs = &PolynomialCoeff + Degree * Dimension;
   Standard_Real*       aRes    = &Results;
-  Standard_Real*       anOriginal;
-  Standard_Integer     ind = 0;
+
   switch (DerivativeRequest)
   {
     case 1: {
-      switch (Dimension)
+      if (Dimension >= 1 && Dimension <= THE_MAX_OPT_DIM)
       {
-        case 1:
-          eval_poly1<1>(aRes, aCoeffs, Degree, Par);
-          break;
-        case 2:
-          eval_poly1<2>(aRes, aCoeffs, Degree, Par);
-          break;
-        case 3:
-          eval_poly1<3>(aRes, aCoeffs, Degree, Par);
-          break;
-        case 4:
-          eval_poly1<4>(aRes, aCoeffs, Degree, Par);
-          break;
-        case 5:
-          eval_poly1<5>(aRes, aCoeffs, Degree, Par);
-          break;
-        case 6:
-          eval_poly1<6>(aRes, aCoeffs, Degree, Par);
-          break;
-        case 7:
-          eval_poly1<7>(aRes, aCoeffs, Degree, Par);
-          break;
-        case 8:
-          eval_poly1<8>(aRes, aCoeffs, Degree, Par);
-          break;
-        case 9:
-          eval_poly1<9>(aRes, aCoeffs, Degree, Par);
-          break;
-        case 10:
-          eval_poly1<10>(aRes, aCoeffs, Degree, Par);
-          break;
-        case 11:
-          eval_poly1<11>(aRes, aCoeffs, Degree, Par);
-          break;
-        case 12:
-          eval_poly1<12>(aRes, aCoeffs, Degree, Par);
-          break;
-        case 13:
-          eval_poly1<13>(aRes, aCoeffs, Degree, Par);
-          break;
-        case 14:
-          eval_poly1<14>(aRes, aCoeffs, Degree, Par);
-          break;
-        case 15:
-          eval_poly1<15>(aRes, aCoeffs, Degree, Par);
-          break;
-        default: {
-          Standard_Real* aRes0 = aRes;
-          Standard_Real* aRes1 = aRes + Dimension;
-
-          memcpy(aRes0, aCoeffs, sizeof(Standard_Real) * Dimension);
-          memset(aRes1, 0, sizeof(Standard_Real) * Dimension);
-
-          for (Standard_Integer aDeg = 0; aDeg < Degree; aDeg++)
-          {
-            aCoeffs -= Dimension;
-            // Calculating derivatives of the polynomial
-            for (ind = 0; ind < Dimension; ind++)
-              aRes1[ind] = aRes1[ind] * Par + aRes0[ind];
-            // Calculating the value of the polynomial
-            for (ind = 0; ind < Dimension; ind++)
-              aRes0[ind] = aRes0[ind] * Par + aCoeffs[ind];
-          }
-        }
+        THE_EVAL_POLY1_TABLE[Dimension - 1](aRes, aCoeffs, Degree, Par);
+      }
+      else
+      {
+        eval_poly1_runtime(aRes, aCoeffs, Degree, Par, Dimension);
       }
       break;
     }
     case 2: {
-      switch (Dimension)
+      if (Dimension >= 1 && Dimension <= THE_MAX_OPT_DIM)
       {
-        case 1:
-          eval_poly2<1>(aRes, aCoeffs, Degree, Par);
-          break;
-        case 2:
-          eval_poly2<2>(aRes, aCoeffs, Degree, Par);
-          break;
-        case 3:
-          eval_poly2<3>(aRes, aCoeffs, Degree, Par);
-          break;
-        case 4:
-          eval_poly2<4>(aRes, aCoeffs, Degree, Par);
-          break;
-        case 5:
-          eval_poly2<5>(aRes, aCoeffs, Degree, Par);
-          break;
-        case 6:
-          eval_poly2<6>(aRes, aCoeffs, Degree, Par);
-          break;
-        case 7:
-          eval_poly2<7>(aRes, aCoeffs, Degree, Par);
-          break;
-        case 8:
-          eval_poly2<8>(aRes, aCoeffs, Degree, Par);
-          break;
-        case 9:
-          eval_poly2<9>(aRes, aCoeffs, Degree, Par);
-          break;
-        case 10:
-          eval_poly2<10>(aRes, aCoeffs, Degree, Par);
-          break;
-        case 11:
-          eval_poly2<11>(aRes, aCoeffs, Degree, Par);
-          break;
-        case 12:
-          eval_poly2<12>(aRes, aCoeffs, Degree, Par);
-          break;
-        case 13:
-          eval_poly2<13>(aRes, aCoeffs, Degree, Par);
-          break;
-        case 14:
-          eval_poly2<14>(aRes, aCoeffs, Degree, Par);
-          break;
-        case 15:
-          eval_poly2<15>(aRes, aCoeffs, Degree, Par);
-          break;
-        default: {
-          Standard_Real* aRes0 = aRes;
-          Standard_Real* aRes1 = aRes + Dimension;
-          Standard_Real* aRes2 = aRes1 + Dimension;
-
-          // Nullify the results
-          Standard_Integer aSize = 2 * Dimension;
-          memcpy(aRes, aCoeffs, sizeof(Standard_Real) * Dimension);
-          memset(aRes1, 0, sizeof(Standard_Real) * aSize);
-
-          for (Standard_Integer aDeg = 0; aDeg < Degree; aDeg++)
-          {
-            aCoeffs -= Dimension;
-            // Calculating derivatives of the polynomial
-            for (ind = 0; ind < Dimension; ind++)
-              aRes2[ind] = aRes2[ind] * Par + aRes1[ind] * 2.0;
-            for (ind = 0; ind < Dimension; ind++)
-              aRes1[ind] = aRes1[ind] * Par + aRes0[ind];
-            // Calculating the value of the polynomial
-            for (ind = 0; ind < Dimension; ind++)
-              aRes0[ind] = aRes0[ind] * Par + aCoeffs[ind];
-          }
-          break;
-        }
+        THE_EVAL_POLY2_TABLE[Dimension - 1](aRes, aCoeffs, Degree, Par);
+      }
+      else
+      {
+        eval_poly2_runtime(aRes, aCoeffs, Degree, Par, Dimension);
       }
       break;
     }
     default: {
-      // Nullify the results
-      Standard_Integer aResSize = (1 + DerivativeRequest) * Dimension;
-      memset(aRes, 0, sizeof(Standard_Real) * aResSize);
-
-      for (Standard_Integer aDeg = 0; aDeg <= Degree; aDeg++)
+      // General case for DerivativeRequest > 2
+      const int aResSize = (1 + DerivativeRequest) * Dimension;
+      for (int i = 0; i < aResSize; ++i)
       {
-        aRes = &Results + aResSize - Dimension;
+        aRes[i] = 0.0;
+      }
+
+      for (int aDeg = 0; aDeg <= Degree; ++aDeg)
+      {
+        Standard_Real* aPtr = aRes + aResSize - Dimension;
         // Calculating derivatives of the polynomial
-        for (Standard_Integer aDeriv = DerivativeRequest; aDeriv > 0; aDeriv--)
+        for (int aDeriv = DerivativeRequest; aDeriv > 0; --aDeriv)
         {
-          anOriginal = aRes - Dimension; // pointer to the derivative minus 1
-          for (ind = 0; ind < Dimension; ind++)
-            aRes[ind] = aRes[ind] * Par + anOriginal[ind] * aDeriv;
-          aRes = anOriginal;
+          Standard_Real* anOriginal = aPtr - Dimension;
+          for (int ind = 0; ind < Dimension; ++ind)
+          {
+            aPtr[ind] = aPtr[ind] * Par + anOriginal[ind] * aDeriv;
+          }
+          aPtr = anOriginal;
         }
         // Calculating the value of the polynomial
-        for (ind = 0; ind < Dimension; ind++)
-          aRes[ind] = aRes[ind] * Par + aCoeffs[ind];
+        for (int ind = 0; ind < Dimension; ++ind)
+        {
+          aPtr[ind] = aPtr[ind] * Par + aCoeffs[ind];
+        }
         aCoeffs -= Dimension;
       }
     }
@@ -1013,62 +1039,13 @@ void PLib::NoDerivativeEvalPolynomial(const Standard_Real    Par,
   const Standard_Real* aCoeffs = &PolynomialCoeff + DegreeDimension;
   Standard_Real*       aRes    = &Results;
 
-  switch (Dimension)
+  if (Dimension >= 1 && Dimension <= THE_MAX_OPT_DIM)
   {
-    case 1:
-      eval_poly0<1>(aRes, aCoeffs, Degree, Par);
-      break;
-    case 2:
-      eval_poly0<2>(aRes, aCoeffs, Degree, Par);
-      break;
-    case 3:
-      eval_poly0<3>(aRes, aCoeffs, Degree, Par);
-      break;
-    case 4:
-      eval_poly0<4>(aRes, aCoeffs, Degree, Par);
-      break;
-    case 5:
-      eval_poly0<5>(aRes, aCoeffs, Degree, Par);
-      break;
-    case 6:
-      eval_poly0<6>(aRes, aCoeffs, Degree, Par);
-      break;
-    case 7:
-      eval_poly0<7>(aRes, aCoeffs, Degree, Par);
-      break;
-    case 8:
-      eval_poly0<8>(aRes, aCoeffs, Degree, Par);
-      break;
-    case 9:
-      eval_poly0<9>(aRes, aCoeffs, Degree, Par);
-      break;
-    case 10:
-      eval_poly0<10>(aRes, aCoeffs, Degree, Par);
-      break;
-    case 11:
-      eval_poly0<11>(aRes, aCoeffs, Degree, Par);
-      break;
-    case 12:
-      eval_poly0<12>(aRes, aCoeffs, Degree, Par);
-      break;
-    case 13:
-      eval_poly0<13>(aRes, aCoeffs, Degree, Par);
-      break;
-    case 14:
-      eval_poly0<14>(aRes, aCoeffs, Degree, Par);
-      break;
-    case 15:
-      eval_poly0<15>(aRes, aCoeffs, Degree, Par);
-      break;
-    default: {
-      memcpy(aRes, aCoeffs, sizeof(Standard_Real) * Dimension);
-      for (Standard_Integer aDeg = 0; aDeg < Degree; aDeg++)
-      {
-        aCoeffs -= Dimension;
-        for (Standard_Integer ind = 0; ind < Dimension; ind++)
-          aRes[ind] = aRes[ind] * Par + aCoeffs[ind];
-      }
-    }
+    THE_EVAL_POLY0_TABLE[Dimension - 1](aRes, aCoeffs, Degree, Par);
+  }
+  else
+  {
+    eval_poly0_runtime(aRes, aCoeffs, Degree, Par, Dimension);
   }
 }