/* bbowda - Black Box Optimization With Data Analysis
   Copyright (C) 2006-2012 Kevin Kofler <Kevin@tigcc.ticalc.org>
   Copyright (C) 2025 DAGOPT Optimization Technologies GmbH (www.dagopt.com)
                      written by Kevin Kofler <kofler@dagopt.com>

   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version. A copy of the GNU General Public
   License version 3 can be found in the file gpl-3.0.txt.

   Linking bbowda statically or dynamically (directly or indirectly) with
   other modules is making a combined work based on bbowda. Thus, the terms
   and conditions of the GNU General Public License cover the whole
   combination.

   In addition, as a special exception, the copyright holder of bbowda gives
   you permission to combine the bbowda program:
   * with free software programs or libraries that are released under the
     GNU Library or Lesser General Public License (LGPL), either version 2
     of the License, or (at your option) any later version,
   * with free software programs or libraries that are released under the
     IBM Common Public License (CPL), either version 1.0 of the License, or
     (at your option) any later version,
   * with free software programs or libraries that are released under the
     eclipse.org Eclipse Public License (EPL), either version 1.0 of the
     License, or (at your option) any later version,
   * with free software programs or libraries that are released under the
     CeCILL-C Free Software License Agreement, either version 1 of the License,
     or (at your option) any later version,
   * with code included in the standard release of MUMPS under the old MUMPS
     Conditions of Use as reproduced in licenses.txt (or modified versions
     of such code, with unchanged license; variants of the license where only
     the list of contributors and/or the list of suggested citations changed
     shall be considered the same license) and
   * if you qualify for a free of charge license of DONLP2, with code
     included in the standard release of DONLP2 under the DONLP2 Conditions
     of Use as reproduced in licenses.txt (or modified versions of such code,
     with unchanged license).
   (For avoidance of doubt, this implies that it is permitted, e.g., to combine
   the bbowda program with current versions of Ipopt released under the EPL
   version 2.0, because 2.0 is >= 1.0. Its dependency MUMPS is released under
   the CeCILL-C version 1, which is also listed above.)

   You may copy and distribute such a system following the terms of the GNU
   GPL for bbowda and the licenses of the other code concerned, provided that
   you include the source code of that other code when and as the GNU GPL
   requires distribution of source code.

   Note that people who make modified versions of bbowda are not obligated
   to grant this special exception for their modified versions; it is their
   choice whether to do so. The GNU General Public License gives permission
   to release a modified version without this exception; this exception also
   makes it possible to release a modified version which carries forward
   this exception.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>. */

#define _ISOC99_SOURCE
#define _GNU_SOURCE /* qsort_r (not yet in _POSIX_C_SOURCE/_XOPEN_SOURCE, see
                       https://sourceware.org/bugzilla/show_bug.cgi?id=32581) */
#include "covar.h"
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <float.h>
#include "eval.h"
#include "xmalloc.h"

#ifdef _WIN32
#define QSORT_R qsort_s
#define QSORT_R_ARGS void *user_data, const void *p, const void *q
#else /* assume POSIX */
#define QSORT_R qsort_r
#define QSORT_R_ARGS const void *p, const void *q, void *user_data
#endif

#ifdef __GNUC__
#define ATTR_UNUSED __attribute__((unused))
#else
#define ATTR_UNUSED /**/
#endif

struct covar_private {
  /* Cholesky factor of the covariance matrix */
  double *L_p;
#define L(priv) ((double(*)[DIMX+DIMZ+DIMY+DIMY_EQ])(priv)->L_p)
  /* Transpose of L, reversed, to avoid cache misses in compute_MX */
  double *LTrev_p;
#define LTrev(priv) ((double(*)[DIMX+DIMZ+DIMY+DIMY_EQ])(priv)->LTrev_p)
  /* Copy of currpts sorted by the norm of the equality constraint violation */
  double *usedpts_p;
#define usedpts(priv) ((double(*)[DIMX+DIMY+DIMY_EQ])(priv)->usedpts_p)
  /* Full number of points in usedpts */
  size_t usedpts_size;
  /* Number of points in usedpts which should actually be used */
  size_t numusedpts;
  /* Whether usedpts_p is an owned copy or a borrowed reference */
  _Bool usedpts_p_owned;
};

/* allocate vectors and matrices for the covariance models */
struct covar * alloc_covar(const struct bbowda_problem *problem)
{
  struct covar *covar = xmalloc(sizeof(struct covar));
  covar->Xbar=xmalloc((size_t)(DIMX+DIMZ+DIMY+DIMY_EQ)*sizeof(double));
  covar->priv=xmalloc(sizeof(struct covar_private));
  covar->priv->L_p=xmalloc((size_t)(DIMX+DIMZ+DIMY+DIMY_EQ)*(size_t)(DIMX+DIMZ+DIMY+DIMY_EQ)*sizeof(double));
  covar->priv->LTrev_p=xmalloc((size_t)(DIMX+DIMZ+DIMY+DIMY_EQ)*(size_t)(DIMX+DIMZ+DIMY+DIMY_EQ)*sizeof(double));
  covar->priv->usedpts_p=NULL;
  covar->priv->usedpts_p_owned=(_Bool)DIMY_EQ;
  return covar;
}

static void for_each_used(struct covar *covar,
                          const struct bbowda_problem *problem,
                          struct eval_points *evalpts,
                          double *acc, double *aux,
                          void (*update)(struct covar *,
                                         const struct bbowda_problem *,
                                         struct eval_points *, const double *,
                                         double *, double *))
{
  size_t j;
  for (j=0; j<covar->priv->numusedpts; j++) {
    update(covar,problem,evalpts,usedpts(covar->priv)[j],acc,aux);
  }
}

/* MX = C^-1 X = (L LT)^-1 X = L^-T L^-1 X */
void compute_MX(struct covar *covar,
                const struct bbowda_problem *problem, const double *X,
                double *MX)
{
  size_t i,j,k;
  /* MX := L^-1 X */
  for (i=0; i<(size_t)(DIMX+DIMZ+DIMY+DIMY_EQ); i++) {
    MX[i]=X[i];
    for (j=0; j<i; j++) {
      MX[i]-=L(covar->priv)[i][j]*MX[j];
    }
    MX[i]/=L(covar->priv)[i][i];
  }
  /* MX := L^-T X */
  /* try hard to avoid cache misses */
  for (k=0; k<(size_t)(DIMX+DIMZ+DIMY+DIMY_EQ); k++) {
    j=i;
    double d=LTrev(covar->priv)[k][--i];
    for (; j<(size_t)(DIMX+DIMZ+DIMY+DIMY_EQ); j++) {
      MX[i]-=LTrev(covar->priv)[k][j]*MX[j];
    }
    MX[i]/=d;
  }
}

/* empirical formula for the weight:
   * our point weights decrease with the 6th power of the distance
   * the worse the point, the less it will be weighted
   weight = (||x-xbest||^2)^(-6)/(1/10+(goodness-best_goodness))^(1/2) */
static double weight_of_point(const struct bbowda_problem *problem,
                              struct eval_points *evalpts, const double *x)
{
  size_t i;
  double goodness=get_point_goodness(evalpts,problem,x), weight=0.;
  for (i=0; i<(size_t)DIMX; i++) {
    double d=evalpts->best_point[i]-x[i];
    weight += d*d;
  }
  if (weight == 0.) return 0.;
  weight = pow(weight,-3.);
  weight /= sqrt(.1+(goodness-evalpts->best_goodness));
  return weight;
}

static void compute_covar(struct covar *covar,
                          const struct bbowda_problem *problem,
                          struct eval_points *evalpts, const double *x,
                          double *Xbar, double *count)
{
  size_t i,j,k,l,a,b;
  double weight=weight_of_point(problem,evalpts,x);
  for (i=0; i<(size_t)DIMX; i++) {
    for (k=0; k<=i; k++) {
      L(covar->priv)[i][k] += weight*(x[i]-Xbar[i])*(x[k]-Xbar[k]);
    }
  }
  for (i=0,a=(size_t)DIMX; i<(size_t)DIMX; i++) {
    for (j=0; j<=i; j++,a++) {
      for (k=0; k<(size_t)DIMX; k++) {
        L(covar->priv)[a][k] += weight*(x[i]*x[j]-Xbar[a])*(x[k]-Xbar[k]);
      }
      for (k=0,b=(size_t)DIMX; b<=a; k++) {
        for (l=0; l<=k && b<=a; l++,b++) {
          L(covar->priv)[a][b] += weight*(x[i]*x[j]-Xbar[a])*(x[k]*x[l]-Xbar[b]);
        }
      }
    }
  }
  for (i=0; i<(size_t)(DIMY+DIMY_EQ); i++) {
    for (k=0; k<(size_t)DIMX; k++) {
      L(covar->priv)[i+DIMX+DIMZ][k] += weight*(x[i+DIMX]-Xbar[i+DIMX+DIMZ])*(x[k]-Xbar[k]);
    }
    for (k=0,b=(size_t)DIMX; k<(size_t)DIMX; k++) {
      for (l=0; l<=k; l++,b++) {
        L(covar->priv)[i+DIMX+DIMZ][b] += weight*(x[i+DIMX]-Xbar[i+DIMX+DIMZ])*(x[k]*x[l]-Xbar[b]);
      }
    }
    for (k=0; k<=i; k++) {
      L(covar->priv)[i+DIMX+DIMZ][k+DIMX+DIMZ] += weight*(x[i+DIMX]-Xbar[i+DIMX+DIMZ])*(x[k+DIMX]-Xbar[k+DIMX+DIMZ]);
    }
  }
  (*count)+=weight;
}

static void compute_kup_klow(struct covar *covar,
                             const struct bbowda_problem *problem,
                             struct eval_points *evalpts, const double *x,
                             double *dmax, double *aux ATTR_UNUSED)
{
  /* compute k = XT M X */
  double X[DIMX+DIMZ+DIMY+DIMY_EQ], MX[DIMX+DIMZ+DIMY+DIMY_EQ], *p=X, k=0., d=0.;
  size_t i,j;
  for (i=0; i<(size_t)DIMX; i++) {
    d += (x[i]-evalpts->best_point[i])*(x[i]-evalpts->best_point[i]);
  }
  if (d>*dmax) return;
  for (i=0; i<(size_t)DIMX; i++) {
    *(p++) = x[i];
  }
  for (i=0; i<(size_t)DIMX; i++) {
    for (j=0; j<=i; j++) {
      *(p++) = x[i]*x[j];
    }
  }
  for (i=0; i<(size_t)(DIMY+DIMY_EQ); i++) {
    *(p++) = x[DIMX+i];
  }
  for (i=0; i<(size_t)(DIMX+DIMZ+DIMY+DIMY_EQ); i++) {
    X[i] -= covar->Xbar[i];
  }
  compute_MX(covar,problem,X,MX);
  for (i=0; i<(size_t)(DIMX+DIMZ+DIMY+DIMY_EQ); i++) {
    k += X[i]*MX[i];
  }

  /* update kup, klow */
  if (k>covar->kup) covar->kup=k;
  if (k<covar->klow) covar->klow=k;
}

static int compare_double(const void *p, const void *q)
{
  return (*(double *)p>*(double *)q)-(*(double *)p<*(double *)q);
}

static int compare_yeq(const struct bbowda_problem *problem,
                       const void *p, const void *q)
{
  size_t i;
  double yeq1=0., yeq2=0.;
  // compute norm(p[yeq])^2 and norm(q[yeq])^2
  for (i=(size_t)(DIMX+DIMY); i<(size_t)(DIMX+DIMY+DIMY_EQ); i++) {
    yeq1+=i[(double*)p]*i[(double*)p];
    yeq2+=i[(double*)q]*i[(double*)q];
  }
  // compare them
  return (yeq1>yeq2)-(yeq1<yeq2);
}

static int compare_yeq_r(QSORT_R_ARGS)
{
  return compare_yeq((const struct bbowda_problem *)user_data,p,q);
}

void build_local_regcovar_model(struct covar *covar,
                                const struct bbowda_problem *problem,
                                struct eval_points *evalpts)
{
  if (DIMY_EQ) {
    if (covar->priv->usedpts_p) {
      /* make room for the new points in usedpts */
      covar->priv->usedpts_p=xrealloc(covar->priv->usedpts_p,evalpts->numcurrpts*((size_t)(DIMX+DIMY+DIMY_EQ))*sizeof(double));
      /* sort the new points by the norm of the equality constraint violation
         using insertion sort */
      size_t i,j;
      for (i=covar->priv->usedpts_size; i<evalpts->numcurrpts; i++) {
        /* This could be done more efficiently using binary search, but the
           memmove is O(n) anyway. We can't use bsearch as it only returns exact
           matches. */
        for (j=0; j<i; j++) {
          if (compare_yeq(problem,currpts(evalpts)[i],usedpts(covar->priv)[j])
              >=0) break;
        }
        memmove(usedpts(covar->priv)+j+1,usedpts(covar->priv)+j,(i-j)*((size_t)(DIMX+DIMY+DIMY_EQ))*sizeof(double));
        memcpy(usedpts(covar->priv)[j],currpts(evalpts)[i],((size_t)(DIMX+DIMY+DIMY_EQ))*sizeof(double));
      }
    } else {
      /* allocate a copy of currpts(evalpts) */
      covar->priv->usedpts_p=xmalloc(evalpts->numcurrpts*((size_t)(DIMX+DIMY+DIMY_EQ))*sizeof(double));
      memcpy(usedpts(covar->priv),currpts(evalpts),evalpts->numcurrpts*((size_t)(DIMX+DIMY+DIMY_EQ))*sizeof(double));
      /* sort the points by the norm of the equality constraint violation */
      QSORT_R(usedpts(covar->priv),evalpts->numcurrpts,(size_t)(DIMX+DIMY+DIMY_EQ)*sizeof(double),(void *)problem,compare_yeq_r);
    }
    covar->priv->usedpts_size=evalpts->numcurrpts;
    /* use only the first half unless we don't have enough points */
    covar->priv->numusedpts=(evalpts->numcurrpts>=28)?(evalpts->numcurrpts>>1):evalpts->numcurrpts;
  } else {
    covar->priv->numusedpts=evalpts->numcurrpts;
    covar->priv->usedpts_p=evalpts->currpts_p;
  }

  /* compute the covariance matrix */
  size_t i,j,k;
  double n=0., Cdiag[DIMX+DIMZ+DIMY+DIMY_EQ], d[covar->priv->numusedpts], dmax;
  for (k=0; k<(size_t)DIMX; k++) {
    covar->Xbar[k]=evalpts->best_point[k];
  }
  for (i=0; i<(size_t)DIMX; i++) {
    for (j=0; j<=i; j++) {
      covar->Xbar[k++]=evalpts->best_point[i]*evalpts->best_point[j];
    }
  }
  for (k=0; k<(size_t)(DIMY+DIMY_EQ); k++) {
    covar->Xbar[k+DIMX+DIMZ]=evalpts->best_point[k+DIMX];
  }
  for (i=0; i<(size_t)(DIMX+DIMZ+DIMY+DIMY_EQ); i++) {
    for (j=0; j<=i; j++) {
      L(covar->priv)[i][j]=0.;
    }
  }
  for_each_used(covar,problem,evalpts,covar->Xbar,&n,compute_covar);
  for (i=0; i<(size_t)(DIMX+DIMZ+DIMY+DIMY_EQ); i++) {
    for (j=0; j<=i; j++) {
      L(covar->priv)[i][j]/=n;
    }
  }

  /* save the diagonal of the matrix */
  for (j=0; j<(size_t)(DIMX+DIMZ+DIMY+DIMY_EQ); j++) {
    Cdiag[j]=L(covar->priv)[j][j];
  }
  /* compute the regularized Cholesky factorization of the matrix */
  for (j=0; j<(size_t)(DIMX+DIMZ+DIMY+DIMY_EQ); j++) {
    double Ljj;
    /* regularize */
    if (L(covar->priv)[j][j]<=sqrt(DBL_EPSILON)*Cdiag[j]) {
      L(covar->priv)[j][j]=(Cdiag[j]==0.)?1.:(sqrt(DBL_EPSILON)*Cdiag[j]);
    }
    /* in higher dimensions, regularize harder */
    if (DIMX > 6)
      L(covar->priv)[j][j]+=sqrt(DBL_EPSILON)*Cdiag[j];

    Ljj=L(covar->priv)[j][j];
    for (i=j+1; i<(size_t)(DIMX+DIMZ+DIMY+DIMY_EQ); i++) {
      for (k=i; k<(size_t)(DIMX+DIMZ+DIMY+DIMY_EQ); k++) {
        L(covar->priv)[k][i]-=L(covar->priv)[k][j]*L(covar->priv)[i][j]/Ljj;
      }
    }
    for (k=j; k<(size_t)(DIMX+DIMZ+DIMY+DIMY_EQ); k++) {
      L(covar->priv)[k][j]/=sqrt(Ljj);
    }
  }
  /* compute the reversed transpose */
  for (i=0; i<(size_t)(DIMX+DIMZ+DIMY+DIMY_EQ); i++) {
    for (j=0; j<=i; j++) {
      LTrev(covar->priv)[(size_t)(DIMX+DIMZ+DIMY+DIMY_EQ-1)-j][i]=L(covar->priv)[i][j];
    }
  }

  if (covar->priv->numusedpts<=((size_t)DIMX<<1)) {
    dmax=INFINITY;
  } else {
    /* compute distances from best point */
    for (j=0; j<covar->priv->numusedpts; j++) {
      d[j]=0.;
      for (i=0; i<(size_t)DIMX; i++) {
        d[j]+=(usedpts(covar->priv)[j][i]-evalpts->best_point[i])*(usedpts(covar->priv)[j][i]-evalpts->best_point[i]);
      }
    }
    /* sort them */
    qsort(d,covar->priv->numusedpts,sizeof(double),compare_double);
    /* take the 2*DIMX+1st entry as dmax, i.e. take at least 2*DIMX+1 points into
       account */
    dmax=d[DIMX<<1];
  }

  /* compute kup, klow */
  covar->kup=-INFINITY;
  covar->klow=INFINITY;
  for_each_used(covar,problem,evalpts,&dmax,NULL,compute_kup_klow);
  /* fudge kup for higher dimensions so we get reasonably-sized trust regions,
     otherwise the local search gets stuck in a non-optimal point */
  covar->kup *= pow(2.,(double)(DIMX-1));
}

void free_covar(struct covar *covar)
{
  if (covar->priv->usedpts_p_owned) { /* otherwise, usedpts_p is owned by struct eval_points */
    free(covar->priv->usedpts_p);
  }
  free(covar->priv->LTrev_p);
  free(covar->priv->L_p);
  free(covar->priv);
  free(covar->Xbar);
  free(covar);
}
