#!/usr/bin/env python3

import os
import random

def save(fn,x):
  if os.path.exists(fn):
    with open(fn) as f:
      cur = f.read()
    if cur == x: return
  with open(fn,'w') as f:
    f.write(x)

# ----- caching
# only for integer inputs
# and dictionary-of-string-to-list-of-integer outputs
# with no whitespace in string

class cached(object):
  def __init__(self,func):
    self.func = func
    self.name = func.__name__
  def __call__(self,*args):
    for x in args:
      assert isinstance(x,int)
    dir = 'autogen/cached/%s' % '/'.join(hex(x) for x in args)
    fn = '%s/%s' % (dir,self.name)
    if os.path.exists(fn):
      result = {}
      with open(fn) as f:
        for line in f:
          line = line.strip()
          s,y = line.split(':')
          y = [] if y == '' else y.split(',')
          result[s] = [int(yj) for yj in y]
    else:
      result = self.func(*args)
      os.makedirs(dir,exist_ok=True)
      with open(fn,'w') as f:
        for s in result:
          assert s == s.split()[0]
          f.write('%s:%s\n' % (s,','.join(str(yj) for yj in result[s])))
    return result

# ----- mathematical basics

p = 2**255-19
two255 = 2**255

def inv(x):
  return pow(x,p-2,p)

def littleendian(x,bytes):
  return [255&(x>>(8*i)) for i in range(bytes)]

def bit(x,i):
  return 1&(x>>i)

def cswap(x,y,bit):
  assert bit in (0,1)
  if bit == 1: return y,x
  return x,y

montbase = 9

def montgomery(x1,n): # copied from gfverif
  A = 486662
  x2,z2,x3,z3 = 1,0,x1,1
  for i in reversed(range(255)):
    ni = bit(n,i)
    x2,x3 = cswap(x2,x3,ni)
    z2,z3 = cswap(z2,z3,ni)
    x3,z3 = 4*(x2*x3-z2*z3)**2,4*x1*(x2*z3-z2*x3)**2
    x2,z2 = (x2**2-z2**2)**2,4*x2*z2*(x2**2+A*x2*z2+z2**2)
    x3,z3 = x3%p,z3%p
    x2,z2 = x2%p,z2%p
    x2,x3 = cswap(x2,x3,ni)
    z2,z3 = cswap(z2,z3,ni)
  return (x2*pow(z2,p-2,p))%p

d = (-121665*inv(121666))%p
sqrtm1 = pow(2,(p-1)//4,p)

def isoncurve(P):
  x,y = P
  return (y*y-x*x-1-d*x*x*y*y) % p == 0

def decompress(y):
  y %= p
  xsq = (y*y-1)*inv(d*y*y+1)
  x = pow(xsq,(p+3)//8,p)
  if (x*x-xsq)%p: x *= sqrtm1
  assert (x*x-xsq)%p == 0
  x %= p
  if x&1: x = p-x
  P = x,y
  assert isoncurve(P)
  return P

edbase = decompress(4*inv(5))
assert edbase[0]%2 == 0
point10 = decompress(10)
point26 = decompress(26)

def edwards(P1,P2):
  assert isoncurve(P1)
  assert isoncurve(P2)
  x1,y1 = P1
  x2,y2 = P2
  x3 = (x1*y2+y1*x2) * inv(1+d*x1*x2*y1*y2)
  y3 = (y1*y2+x1*x2) * inv(1-d*x1*x2*y1*y2)
  P3 = x3%p,y3%p
  assert isoncurve(P3)
  return P3

def scalarmult(P,n):
  assert n >= 0
  assert isoncurve(P)
  if n == 0: return 0,1
  if n == 1: return P
  P2 = edwards(P,P)
  Q = scalarmult(P2,n//2)
  if n & 1: Q = edwards(P,Q)
  assert isoncurve(Q)
  return Q

# ----- the mathematical primitives

@cached
def pow_inv25519(x):
  assert x >= 0
  assert x < 2**256
  y = inv(x % two255)
  assert y >= 0
  assert y < p
  return {'q':littleendian(y,32),'p':littleendian(x,32)}

@cached
def nP_montgomery25519(n,P):
  assert n >= 0
  assert n < 2**256
  assert P >= 0
  assert P < 2**256
  m = n % two255
  m |= 2**254
  m &= ~7
  assert m >= 2**254
  assert m <= 2**254 + 8*(2**251-1)
  assert m%8 == 0
  mP = montgomery(P % two255,m)
  assert mP >= 0
  assert mP < p
  return {'q':littleendian(mP,32),'n':littleendian(n,32),'p':littleendian(P,32)}

@cached
def nG_merged25519(n):
  assert n >= 0
  assert n < 2**256
  if n&two255:
    nG = montgomery(montbase,n-two255)
    assert nG >= 0
    assert nG < p
  else:
    x,y = scalarmult(edbase,n)
    nG = y
    assert nG >= 0
    assert nG < p
    if x&1: nG |= two255
  return {'q':littleendian(nG,32),'n':littleendian(n,32)}

@cached
def nG_montgomery25519(n):
  result = nP_montgomery25519(n,montbase)
  assert result['p'] == [9]+[0]*31
  return {'q':result['q'],'n':result['n']}

primeorder = 2**252+27742317777372353535851937790883648493

@cached
def mGnP_ed25519(m,n,P):
  assert m >= 0
  assert m < 2**256
  assert n >= 0
  assert n < 2**512
  assert P >= 0
  assert P < 2**256

  mrep = littleendian(m,32)
  nrep = littleendian(n,64)
  Prep = littleendian(P,32)

  ok = 1
  if m >= primeorder: ok = 0
  m %= primeorder
  n %= primeorder

  parity = P>>255
  y = P & ~two255
  try:
    # assertion failures here are for invalid P
    assert y < p
    x,y = decompress(y)
    if parity: x = (-x)%p
    assert x&1 == parity
  except AssertionError:
    ok = 0
    x,y = point26
    x = (-x)%p # negative of point26

  assert x >= 0
  assert x < p
  assert y >= 0
  assert y < p

  # now want mG-n(x,y)
  x = (-x)%p

  # now want mG+n(x,y)
  Qx,Qy = edwards(scalarmult(edbase,m),scalarmult((x,y),n))

  assert Qx >= 0
  assert Qx < p
  assert Qy >= 0
  assert Qy < p
  if Qx&1: Qy += two255
  Qrep = littleendian(Qy,32)+[ok]
  return {'Q':Qrep,'m':mrep,'n':nrep,'P':Prep}

@cached
def multiscalar_ed25519(*nlistPlist):
  n = nlistPlist[:len(nlistPlist)//2]
  P = nlistPlist[len(nlistPlist)//2:]
  assert len(n) == len(P)

  Q = (0,1)

  ok = 1
  nrep = []
  Prep = []
  for nj,Pj in zip(n,P):
    assert nj >= 0
    assert nj < 2**256
    nrep += littleendian(nj,32)

    assert Pj >= 0
    assert Pj < 2**256
    Prep += littleendian(Pj,32)

    if nj >= primeorder: ok = 0

    parity = Pj>>255
    y = Pj & ~two255
    try:
      # assertion failures here are for invalid P
      assert y < p
      x,y = decompress(y)
      if parity: x = (-x)%p
      assert x&1 == parity
    except AssertionError:
      ok = 0
      x,y = point26

    nj %= primeorder
    Q = edwards(Q,scalarmult((x,y),nj))

  Qx,Qy = Q
  assert Qx >= 0
  assert Qx < p
  assert Qy >= 0
  assert Qy < p
  if Qx&1: Qy += two255
  Qrep = littleendian(Qy,32)+[ok]
  return {'Q':Qrep,'n':nrep,'P':Prep}

# ----- precomputed test vectors

precomputed = {}

def precompute():
  global precomputed

  # the list of small-order montgomery x-coordinates from the curve25519 page:
  corners = [
    0,
    1,
    325606250916557431795983626356110631294008115727848805560023387167927233504,
    39382357235489614581723060781553021112529911719440698176882885853963445705823,
    2**255 - 19 - 1,
    2**255 - 19,
    2**255 - 19 + 1,
    2**255 - 19 + 325606250916557431795983626356110631294008115727848805560023387167927233504,
    2**255 - 19 + 39382357235489614581723060781553021112529911719440698176882885853963445705823,
    2*(2**255 - 19) - 1,
    2*(2**255 - 19),
    2*(2**255 - 19) + 1,
  ]
  # and some edwards y-coordinates of order 8:
  corners += [
    2707385501144840649318225287225658788936804267575313519463743609750303402022,
    55188659117513257062467267217118295137698188065244968500265048394206261417927,
    2**255 - 19 + 2707385501144840649318225287225658788936804267575313519463743609750303402022,
    2**255 - 19 + 55188659117513257062467267217118295137698188065244968500265048394206261417927,
  ]
  # and all small numbers mod 2**255-19 and mod 2**255:
  corners += list(range(32))
  corners += list(range(2**255-32,2**255+32))
  corners += list(range(2**256-64,2**256))

  random.seed('pow_inv25519')
  T = corners + [random.randrange(2**256) for loop in range(128)]
  results = [pow_inv25519(x) for x in sorted(set(T))]
  precomputed['pow','inv25519'] = results

  # reuses results from pow
  random.seed('powbatch_inv25519')
  batchresults = []
  for batch in range(0,17):
    for loop in range(10):
      batchq = []
      batchp = []
      for j in range(batch):
        pos = random.randrange(len(results))
        batchq += results[pos]['q']
        batchp += results[pos]['p']
      batchresults += [{'q':batchq,'p':batchp,'batch':batch}]
  precomputed['powbatch','inv25519'] = batchresults

  random.seed('nP_montgomery25519')
  T = corners + [random.randrange(2**256) for loop in range(128)]
  inputs = [(random.randrange(2**256),P) for P in sorted(set(T))]
  for nmult in range(8):
    for n in range(nmult*primeorder-2,nmult*primeorder+3):
      if n < 0: continue
      inputs += [(n,16)]
      inputs += [(n|two255,16)]
  results = [nP_montgomery25519(*x) for x in inputs]
  precomputed['nP','montgomery25519'] = results

  # reuses results from nP
  random.seed('nPbatch_montgomery25519')
  batchresults = []
  for batch in range(0,17):
    batchq = []
    batchn = []
    batchp = []
    for j in range(batch):
      pos = random.randrange(len(results))
      batchq += results[pos]['q']
      batchn += results[pos]['n']
      batchp += results[pos]['p']
    batchresults += [{'q':batchq,'n':batchn,'p':batchp,'batch':batch}]
  precomputed['nPbatch','montgomery25519'] = batchresults

  random.seed('nG_merged25519')
  results = [nG_merged25519(random.randrange(2**256)) for loop in range(128)]
  for nmult in range(8):
    for n in range(nmult*primeorder-2,nmult*primeorder+3):
      if n < 0: continue
      results += [nG_merged25519(n)]
      results += [nG_merged25519(n|two255)]
  precomputed['nG','merged25519'] = results

  random.seed('nG_montgomery25519')
  results = [nG_montgomery25519(random.randrange(2**256)) for loop in range(128)]
  for nmult in range(8):
    for n in range(nmult*primeorder-2,nmult*primeorder+3):
      if n < 0: continue
      results += [nG_montgomery25519(n)]
      results += [nG_montgomery25519(n|two255)]
  precomputed['nG','montgomery25519'] = results

  random.seed('mGnP_ed25519')
  results = []
  T = corners + [random.randrange(2**256) for loop in range(128)]
  for P in sorted(set(T)):
    m = random.randrange(2**256)
    n = random.randrange(2**512)
    results += [mGnP_ed25519(m,n,P)]
  for mmult in range(16):
    for m in range(mmult*primeorder-2,mmult*primeorder+3):
      if m < 0: continue
      n = random.randrange(2**512)
      results += [mGnP_ed25519(m,n,point10[1])]
  for nmult in range(16):
    for n in range(nmult*primeorder-2,nmult*primeorder+3):
      if n < 0: continue
      m = random.randrange(2**256)
      results += [mGnP_ed25519(m,n,point10[1])]
  results += [mGnP_ed25519(0,0,point10[1])]
  precomputed['mGnP','ed25519'] = results

  random.seed('multiscalar_ed25519')
  T = corners + [random.randrange(2**256) for loop in range(128)]
  U = [(random.randrange(2**256),P) for P in sorted(set(T))]
  for nmult in range(16):
    for n in range(nmult*primeorder-2,nmult*primeorder+3):
      if n < 0: continue
      U += [(n,point10[1])]
  results = []
  for multi in range(0,33):
    nlist = []
    Plist = []
    for j in range(multi):
      pos = random.randrange(len(U))
      nlist += [U[pos][0]]
      Plist += [U[pos][1]]
    result = multiscalar_ed25519(*(nlist+Plist))
    result['batch'] = multi
    results += [result]
  precomputed['multiscalar','ed25519'] = results

precompute()

# ----- generating test program

H = ['''\
#ifndef lib25519_test_h
#define lib25519_test_h

#define aligned lib25519_test_aligned
#define callocplus lib25519_test_callocplus
#define checksum lib25519_test_checksum
#define checksum_clear lib25519_test_checksum_clear
#define checksum_expected lib25519_test_checksum_expected
#define double_canary lib25519_test_double_canary
#define endianness lib25519_test_endianness
#define forked lib25519_test_forked
#define input_compare lib25519_test_input_compare
#define input_prepare lib25519_test_input_prepare
#define myrandom lib25519_test_myrandom
#define ok lib25519_test_ok
#define output_compare lib25519_test_output_compare
#define output_prepare lib25519_test_output_prepare
#define public lib25519_test_public
#define secret lib25519_test_secret
#define targeti lib25519_test_targeti
#define targetn lib25519_test_targetn
#define targetoffset lib25519_test_targetoffset
#define targeto lib25519_test_targeto
#define targetp lib25519_test_targetp
#define valgrind lib25519_test_valgrind

extern const char *targeto;
extern const char *targetp;
extern const char *targeti;
extern const char *targetn;
extern const char *targetoffset;
extern int ok;
extern int valgrind;

extern unsigned long long myrandom(void);
extern void forked(void (*)(long long),long long);
extern void *aligned(void *,long long);
extern void *callocplus(long long);
extern void secret(void *,long long);
extern void public(void *,long long);
extern void double_canary(unsigned char *,unsigned char *,unsigned long long);
extern void input_prepare(unsigned char *,unsigned char *,unsigned long long);
extern void output_prepare(unsigned char *,unsigned char *,unsigned long long);
extern void input_compare(const unsigned char *,const unsigned char *,unsigned long long,const char *);
extern void output_compare(const unsigned char *,const unsigned char *,unsigned long long,const char *);
extern void checksum_expected(const char *);
extern void checksum(const unsigned char *,unsigned long long);
extern void checksum_clear(void);
extern void endianness(unsigned char *,unsigned long long,unsigned long long);

''']

Z = [r'''/* WARNING: auto-generated (by autogen/test); do not edit */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <errno.h>
#include <time.h>
#include <assert.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/wait.h>
#include <fcntl.h>
#include <sys/resource.h>
#include "crypto_uint8.h"
#include "crypto_uint32.h"
#include "crypto_uint64.h"
#include "crypto_declassify.h"
#include <lib25519.h> /* -l25519 */
#include <randombytes.h>
#include "lib25519_test.h"

const char *targeto = 0;
const char *targetp = 0;
const char *targeti = 0;
const char *targetn = 0;
const char *targetoffset = 0;

int ok = 1;

#define fail ((ok = 0),printf)

/* ----- valgrind support */

int valgrind = 0;
static unsigned char valgrind_undefined_byte = 0;
static char *volatile valgrind_pointer = 0;

static char *valgrind_malloc_1(void)
{
  char *x = malloc(1);
  if (!x) abort();
  *(char **volatile) &valgrind_pointer = x;
  return valgrind_pointer;
}

static void valgrind_init(void)
{
  char *e = getenv("valgrind_multiplier");
  char *x;
  if (!e) return;
  x = valgrind_malloc_1();
  valgrind_undefined_byte = x[0]+1;
  valgrind_undefined_byte *= atoi(e);
  valgrind_undefined_byte ^= x[0]+1;
  free(x);
  valgrind = 1;
}

void secret(void *xvoid,long long xlen)
{
  unsigned char *x = xvoid;
  while (xlen > 0) {
    *x ^= valgrind_undefined_byte;
    ++x;
    --xlen;
  }
}

void public(void *x,long long xlen)
{
  crypto_declassify(x,xlen);
}

/* ----- rng and hash, from supercop/try-anything.c */

typedef crypto_uint8 u8;
typedef crypto_uint32 u32;
typedef crypto_uint64 u64;

#define FOR(i,n) for (i = 0;i < n;++i)

static u32 L32(u32 x,int c) { return (x << c) | ((x&0xffffffff) >> (32 - c)); }

static u32 ld32(const u8 *x)
{
  u32 u = x[3];
  u = (u<<8)|x[2];
  u = (u<<8)|x[1];
  return (u<<8)|x[0];
}

static void st32(u8 *x,u32 u)
{
  int i;
  FOR(i,4) { x[i] = u; u >>= 8; }
}

static const u8 sigma[17] = "expand 32-byte k";

static void core_salsa(u8 *out,const u8 *in,const u8 *k)
{
  u32 w[16],x[16],y[16],t[4];
  int i,j,m;

  FOR(i,4) {
    x[5*i] = ld32(sigma+4*i);
    x[1+i] = ld32(k+4*i);
    x[6+i] = ld32(in+4*i);
    x[11+i] = ld32(k+16+4*i);
  }

  FOR(i,16) y[i] = x[i];

  FOR(i,20) {
    FOR(j,4) {
      FOR(m,4) t[m] = x[(5*j+4*m)%16];
      t[1] ^= L32(t[0]+t[3], 7);
      t[2] ^= L32(t[1]+t[0], 9);
      t[3] ^= L32(t[2]+t[1],13);
      t[0] ^= L32(t[3]+t[2],18);
      FOR(m,4) w[4*j+(j+m)%4] = t[m];
    }
    FOR(m,16) x[m] = w[m];
  }

  FOR(i,16) st32(out + 4 * i,x[i] + y[i]);
}

static void salsa20(u8 *c,u64 b,const u8 *n,const u8 *k)
{
  u8 z[16],x[64];
  u32 u,i;
  if (!b) return;
  FOR(i,16) z[i] = 0;
  FOR(i,8) z[i] = n[i];
  while (b >= 64) {
    core_salsa(x,z,k);
    FOR(i,64) c[i] = x[i];
    u = 1;
    for (i = 8;i < 16;++i) {
      u += (u32) z[i];
      z[i] = u;
      u >>= 8;
    }
    b -= 64;
    c += 64;
  }
  if (b) {
    core_salsa(x,z,k);
    FOR(i,b) c[i] = x[i];
  }
}

static void increment(u8 *n)
{
  if (!++n[0])
    if (!++n[1])
      if (!++n[2])
        if (!++n[3])
          if (!++n[4])
            if (!++n[5])
              if (!++n[6])
                if (!++n[7])
                  ;
}

static unsigned char testvector_n[8];

static void testvector_clear(void)
{
  memset(testvector_n,0,sizeof testvector_n);
}

static void testvector(unsigned char *x,unsigned long long xlen)
{
  const static unsigned char testvector_k[33] = "generate inputs for test vectors";
  salsa20(x,xlen,testvector_n,testvector_k);
  increment(testvector_n);
}

unsigned long long myrandom(void)
{
  unsigned char x[8];
  unsigned long long result;
  testvector(x,8);
  result = x[7];
  result = (result<<8)|x[6];
  result = (result<<8)|x[5];
  result = (result<<8)|x[4];
  result = (result<<8)|x[3];
  result = (result<<8)|x[2];
  result = (result<<8)|x[1];
  result = (result<<8)|x[0];
  return result;
}

static unsigned char canary_n[8];

static void canary(unsigned char *x,unsigned long long xlen)
{
  const static unsigned char canary_k[33] = "generate pad to catch overwrites";
  salsa20(x,xlen,canary_n,canary_k);
  increment(canary_n);
}

void double_canary(unsigned char *x2,unsigned char *x,unsigned long long xlen)
{
  if (valgrind) return;
  canary(x - 16,16);
  canary(x + xlen,16);
  memcpy(x2 - 16,x - 16,16);
  memcpy(x2 + xlen,x + xlen,16);
}

void input_prepare(unsigned char *x2,unsigned char *x,unsigned long long xlen)
{
  testvector(x,xlen);
  if (valgrind) {
    memcpy(x2,x,xlen);
    return;
  }
  canary(x - 16,16);
  canary(x + xlen,16);
  memcpy(x2 - 16,x - 16,xlen + 32);
}

void input_compare(const unsigned char *x2,const unsigned char *x,unsigned long long xlen,const char *fun)
{
  if (valgrind) return;
  if (memcmp(x2 - 16,x - 16,xlen + 32)) {
    fail("failure: %s overwrites input\n",fun);
  }
}

void output_prepare(unsigned char *x2,unsigned char *x,unsigned long long xlen)
{
  if (valgrind) {
    memcpy(x2,x,xlen);
    return;
  }
  canary(x - 16,xlen + 32);
  memcpy(x2 - 16,x - 16,xlen + 32);
}

void output_compare(const unsigned char *x2,const unsigned char *x,unsigned long long xlen,const char *fun)
{
  if (valgrind) return;
  if (memcmp(x2 - 16,x - 16,16)) {
    fail("failure: %s writes before output\n",fun);
  }
  if (memcmp(x2 + xlen,x + xlen,16)) {
    fail("failure: %s writes after output\n",fun);
  }
}

/* ----- knownrandombytes */

static const int knownrandombytes_is_only_for_testing_not_for_cryptographic_use = 1;
#define knownrandombytes randombytes

#define QUARTERROUND(a,b,c,d) \
  a += b; d = L32(d^a,16); \
  c += d; b = L32(b^c,12); \
  a += b; d = L32(d^a, 8); \
  c += d; b = L32(b^c, 7);

static void core_chacha(u8 *out,const u8 *in,const u8 *k)
{
  u32 x[16],y[16];
  int i,j;
  FOR(i,4) {
    x[i] = ld32(sigma+4*i);
    x[12+i] = ld32(in+4*i);
  }
  FOR(i,8) x[4+i] = ld32(k+4*i);
  FOR(i,16) y[i] = x[i];
  FOR(i,10) {
    FOR(j,4) { QUARTERROUND(x[j],x[j+4],x[j+8],x[j+12]) }
    FOR(j,4) { QUARTERROUND(x[j],x[((j+1)&3)+4],x[((j+2)&3)+8],x[((j+3)&3)+12]) }
  }
  FOR(i,16) st32(out+4*i,x[i]+y[i]);
}

static void chacha20(u8 *c,u64 b,const u8 *n,const u8 *k)
{
  u8 z[16],x[64];
  u32 u,i;
  if (!b) return;
  FOR(i,16) z[i] = 0;
  FOR(i,8) z[i+8] = n[i];
  while (b >= 64) {
    core_chacha(x,z,k);
    FOR(i,64) c[i] = x[i];
    u = 1;
    FOR(i,8) {
      u += (u32) z[i];
      z[i] = u;
      u >>= 8;
    }
    b -= 64;
    c += 64;
  }
  if (b) {
    core_chacha(x,z,k);
    FOR(i,b) c[i] = x[i];
  }
}

#define crypto_rng_OUTPUTBYTES 736

static int crypto_rng(
        unsigned char *r, /* random output */
        unsigned char *n, /* new key */
  const unsigned char *g  /* old key */
)
{
  static const unsigned char nonce[8] = {0};
  unsigned char x[32+crypto_rng_OUTPUTBYTES];
  chacha20(x,sizeof x,nonce,g);
  memcpy(n,x,32);
  memcpy(r,x+32,crypto_rng_OUTPUTBYTES);
  return 0;
}

static unsigned char knownrandombytes_g[32];
static unsigned char knownrandombytes_r[crypto_rng_OUTPUTBYTES];
static unsigned long long knownrandombytes_pos = crypto_rng_OUTPUTBYTES;

static void knownrandombytes_clear(void)
{
  memset(knownrandombytes_g,0,sizeof knownrandombytes_g);
  memset(knownrandombytes_r,0,sizeof knownrandombytes_r);
  knownrandombytes_pos = crypto_rng_OUTPUTBYTES;
}

void knownrandombytes_main(void *xvoid,long long xlen)
{
  unsigned char *x = xvoid;
  assert(knownrandombytes_is_only_for_testing_not_for_cryptographic_use);

  while (xlen > 0) {
    if (knownrandombytes_pos == crypto_rng_OUTPUTBYTES) {
      crypto_rng(knownrandombytes_r,knownrandombytes_g,knownrandombytes_g);
      knownrandombytes_pos = 0;
    }
    *x++ = knownrandombytes_r[knownrandombytes_pos];
    xlen -= 1;
    knownrandombytes_r[knownrandombytes_pos++] = 0;
  }
}

void knownrandombytes(void *xvoid,long long xlen)
{
  knownrandombytes_main(xvoid,xlen);
  secret(xvoid,xlen);
}

/* ----- checksums */

static unsigned char checksum_state[64];
static char checksum_hex[65];

void checksum_expected(const char *expected)
{
  long long i;
  for (i = 0;i < 32;++i) {
    checksum_hex[2 * i] = "0123456789abcdef"[15 & (checksum_state[i] >> 4)];
    checksum_hex[2 * i + 1] = "0123456789abcdef"[15 & checksum_state[i]];
  }
  checksum_hex[2 * i] = 0;

  if (strcmp(checksum_hex,expected))
    fail("failure: checksum mismatch: %s expected %s\n",checksum_hex,expected);
}

void checksum_clear(void)
{
  memset(checksum_state,0,sizeof checksum_state);
  knownrandombytes_clear();
  testvector_clear();
  /* not necessary to clear canary */
}

void checksum(const unsigned char *x,unsigned long long xlen)
{
  u8 block[16];
  int i;
  while (xlen >= 16) {
    core_salsa(checksum_state,x,checksum_state);
    x += 16;
    xlen -= 16;
  }
  FOR(i,16) block[i] = 0;
  FOR(i,xlen) block[i] = x[i];
  block[xlen] = 1;
  checksum_state[0] ^= 1;
  core_salsa(checksum_state,block,checksum_state);
}

#include "limits.inc"

void *callocplus(long long len)
{
  if (valgrind) {
    unsigned char *x = malloc(len);
    if (!x) abort();
    return x;
  } else {
    unsigned char *x = calloc(1,len + 256);
    long long i;
    if (!x) abort();
    for (i = 0;i < len + 256;++i) x[i] = random();
    return x;
  }
}

void *aligned(void *x,long long len)
{
  if (valgrind)
    return x;
  else {
    long long i;
    unsigned char *y = x;
    y += 64;
    y += 63 & (-(unsigned long) y);
    for (i = 0;i < len;++i) y[i] = 0;
    return y;
  }
}

/* ----- catching SIGILL, SIGBUS, SIGSEGV, etc. */

void forked(void (*test)(long long),long long impl)
{
  if (valgrind) {
    test(impl);
    return;
  }
  fflush(stdout);
  pid_t child = fork();
  int childstatus = -1;
  if (child == -1) {
    fprintf(stderr,"fatal: fork failed: %s",strerror(errno));
    exit(111);
  }
  if (child == 0) {
    ok = 1;
    limits();
    test(impl);
    if (!ok) exit(100);
    exit(0);
  }
  if (waitpid(child,&childstatus,0) != child) {
    fprintf(stderr,"fatal: wait failed: %s",strerror(errno));
    exit(111);
  }
  if (childstatus)
    fail("failure: process failed, status %d\n",childstatus);
  fflush(stdout);
}

''']

# ==========

checksums = {}
operations = []
primitives = {}
sizes = {}
exports = {}
prototypes = {}

with open('api') as f:
  for line in f:
    line = line.strip()
    if line.startswith('crypto_'):
      line = line.split()
      x = line[0].split('/')
      assert len(x) == 2
      o = x[0].split('_')[1]
      if o not in operations: operations += [o]
      p = x[1]
      if o not in primitives: primitives[o] = []
      primitives[o] += [p]
      if len(line) >= 3:
        checksums[o,p] = line[1],line[2]
      continue
    if line.startswith('#define '):
      x = line.split(' ')
      x = x[1].split('_')
      assert len(x) == 4
      assert x[0] == 'crypto'
      o = x[1]
      p = x[2]
      if (o,p) not in sizes: sizes[o,p] = ''
      sizes[o,p] += line+'\n'
      continue
    if line.endswith(');'):
      fun,args = line[:-2].split('(')
      rettype,fun = fun.split()
      fun = fun.split('_')
      o = fun[1]
      assert fun[0] == 'crypto'
      if o not in exports: exports[o] = []
      exports[o] += ['_'.join(fun[1:])]
      if o not in prototypes: prototypes[o] = []
      prototypes[o] += [(rettype,fun,args)]

# ========== verify

Z += [r'''
/* ----- verify, derived from supercop/crypto_verify/try.c */
''']

for p in primitives['verify']:
  Z += [r'''
static int (*crypto_verify_BYTES)(const unsigned char *,const unsigned char *);

static unsigned char *test_verify_BYTES_x;
static unsigned char *test_verify_BYTES_y;

static void test_verify_BYTES_check(void)
{
  unsigned char *x = test_verify_BYTES_x;
  unsigned char *y = test_verify_BYTES_y;
  int r;

  secret(x,BYTES);
  secret(y,BYTES);
  r = crypto_verify_BYTES(x,y);
  public(x,BYTES);
  public(y,BYTES);
  public(&r,sizeof r);

  if (r == 0) {
    if (memcmp(x,y,BYTES))
      fail("failure: different strings pass verify\n");
  } else if (r == -1) {
    if (!memcmp(x,y,BYTES))
      fail("failure: equal strings fail verify\n");
  } else {
    fail("failure: weird return value\n");
  }
}

void test_verify_BYTES_impl(long long impl)
{
  unsigned char *x = test_verify_BYTES_x;
  unsigned char *y = test_verify_BYTES_y;

  if (targeti && strcmp(targeti,".") && strcmp(targeti,lib25519_dispatch_verify_BYTES_implementation(impl))) return;
  if (targetn && atol(targetn) != impl) return;
  if (impl >= 0) {
    crypto_verify_BYTES = lib25519_dispatch_verify_BYTES(impl);
    printf("verify_BYTES %lld implementation %s compiler %s\n",impl,lib25519_dispatch_verify_BYTES_implementation(impl),lib25519_dispatch_verify_BYTES_compiler(impl));
  } else {
    crypto_verify_BYTES = lib25519_verify_BYTES;
    printf("verify_BYTES selected implementation %s compiler %s\n",lib25519_verify_BYTES_implementation(),lib25519_verify_BYTES_compiler());
  }

  randombytes(x,BYTES);
  randombytes(y,BYTES);
  test_verify_BYTES_check();
  memcpy(y,x,BYTES);
  test_verify_BYTES_check();
  y[myrandom() % BYTES] = myrandom();
  test_verify_BYTES_check();
  y[myrandom() % BYTES] = myrandom();
  test_verify_BYTES_check();
  y[myrandom() % BYTES] = myrandom();
  test_verify_BYTES_check();
}

static void test_verify_BYTES(void)
{
  if (targeto && strcmp(targeto,"verify")) return;
  if (targetp && strcmp(targetp,"BYTES")) return;

  test_verify_BYTES_x = callocplus(BYTES);
  test_verify_BYTES_y = callocplus(BYTES);

  for (long long offset = 0;offset < 2;++offset) {
    if (targetoffset && atol(targetoffset) != offset) continue;
    if (offset && valgrind) break;
    printf("verify_BYTES offset %lld\n",offset);
    for (long long impl = -1;impl < lib25519_numimpl_verify_BYTES();++impl)
      forked(test_verify_BYTES_impl,impl);
    ++test_verify_BYTES_x;
    ++test_verify_BYTES_y;
  }
}
'''.replace('BYTES',p)]

# ==========

todo = (
  ('hashblocks',(
    ('h','crypto_hashblocks_STATEBYTES','crypto_hashblocks_STATEBYTES'),
    ('m',None,'4096'),
  ),(
    ('loops','4096','32768'),
    ('maxtest','128','4096'),
  ),(
    ('',(),('h',),('m','mlen')),
  )),
  ('hash',(
    ('h','crypto_hash_BYTES','crypto_hash_BYTES'),
    ('m',None,'4096+crypto_hash_BYTES'),
  ),(
    ('loops','64','512'),
    ('maxtest','128','4096'),
  ),(
    ('',('h',),(),('m','mlen')),
  )),
  ('pow',(
    ('q','crypto_pow_BYTES','crypto_pow_BYTES'),
    ('p','crypto_pow_BYTES','crypto_pow_BYTES'),
  ),(
    ('loops','64','512'),
  ),(
    ('',('q',),(),('p',)),
  )),
  ('powbatch',(
    ('q',None,'128*crypto_powbatch_BYTES'),
    ('p',None,'128*crypto_powbatch_BYTES'),
  ),(
    ('loops','64','512'),
    ('maxtest','16','128'),
  ),(
    ('',('q',),(),('p','mlen')),
  )),
  ('nP',(
    ('q','crypto_nP_POINTBYTES','crypto_nP_POINTBYTES'),
    ('n','crypto_nP_SCALARBYTES','crypto_nP_SCALARBYTES+crypto_nP_POINTBYTES'),
    ('p','crypto_nP_POINTBYTES','crypto_nP_POINTBYTES'),
  ),(
    ('loops','64','512'),
  ),(
    ('',('q',),(),('n','p')),
  )),
  ('nPbatch',(
    ('q',None,'128*crypto_nPbatch_POINTBYTES'),
    ('n',None,'128*(crypto_nPbatch_SCALARBYTES+crypto_nPbatch_POINTBYTES)'),
    ('p',None,'128*crypto_nPbatch_POINTBYTES'),
  ),(
    ('loops','32','256'),
    ('maxtest','16','128'),
  ),(
    ('',('q',),(),('n','p','mlen')),
  )),
  ('nG',(
    ('q','crypto_nG_POINTBYTES','crypto_nG_POINTBYTES'),
    ('n','crypto_nG_SCALARBYTES','crypto_nG_SCALARBYTES+crypto_nG_POINTBYTES'),
  ),(
    ('loops','64','512'),
  ),(
    ('',('q',),(),('n',)),
  )),
  ('mGnP',(
    ('Q','crypto_mGnP_OUTPUTBYTES','crypto_mGnP_OUTPUTBYTES'),
    ('m','crypto_mGnP_MBYTES','crypto_mGnP_MBYTES+crypto_mGnP_OUTPUTBYTES'),
    ('n','crypto_mGnP_NBYTES','crypto_mGnP_NBYTES+crypto_mGnP_OUTPUTBYTES'),
    ('P','crypto_mGnP_PBYTES','crypto_mGnP_PBYTES+crypto_mGnP_OUTPUTBYTES'),
  ),(
    ('loops','128','1024'),
  ),(
    ('',('Q',),(),('m','n','P')),
  )),
  ('multiscalar',(
    ('Q','crypto_multiscalar_OUTPUTBYTES','crypto_multiscalar_OUTPUTBYTES'),
    ('n',None,'128*crypto_multiscalar_SCALARBYTES+crypto_multiscalar_OUTPUTBYTES'),
    ('P',None,'128*crypto_multiscalar_POINTBYTES+crypto_multiscalar_OUTPUTBYTES'),
  ),(
    ('loops','128','1024'),
    ('maxtest','16','128'),
  ),(
    ('',('Q',),(),('n','P','mlen')),
  )),
  ('dh',(
    ('a','crypto_dh_SECRETKEYBYTES','crypto_dh_BYTES+crypto_dh_PUBLICKEYBYTES+crypto_dh_SECRETKEYBYTES'),
    ('b','crypto_dh_SECRETKEYBYTES','crypto_dh_BYTES+crypto_dh_PUBLICKEYBYTES+crypto_dh_SECRETKEYBYTES'),
    ('c','crypto_dh_PUBLICKEYBYTES','crypto_dh_BYTES+crypto_dh_PUBLICKEYBYTES+crypto_dh_SECRETKEYBYTES'),
    ('d','crypto_dh_PUBLICKEYBYTES','crypto_dh_BYTES+crypto_dh_PUBLICKEYBYTES+crypto_dh_SECRETKEYBYTES'),
    ('e','crypto_dh_BYTES','crypto_dh_BYTES+crypto_dh_PUBLICKEYBYTES+crypto_dh_SECRETKEYBYTES'),
    ('f','crypto_dh_BYTES','crypto_dh_BYTES+crypto_dh_PUBLICKEYBYTES+crypto_dh_SECRETKEYBYTES'),
  ),(
    ('loops','64','512'),
  ),(
    ('_keypair',('c','a'),(),()),
    ('_keypair',('d','b'),(),()),
    ('',('e',),(),('d','a')),
    ('',('f',),(),('c','b')),
  )),
  ('sign',(
    ('p','crypto_sign_PUBLICKEYBYTES','4096+crypto_sign_BYTES+crypto_sign_PUBLICKEYBYTES+crypto_sign_SECRETKEYBYTES'),
    ('s','crypto_sign_SECRETKEYBYTES','4096+crypto_sign_BYTES+crypto_sign_PUBLICKEYBYTES+crypto_sign_SECRETKEYBYTES'),
    ('m',None,'4096+crypto_sign_BYTES+crypto_sign_PUBLICKEYBYTES+crypto_sign_SECRETKEYBYTES'),
    ('c',None,'4096+crypto_sign_BYTES+crypto_sign_PUBLICKEYBYTES+crypto_sign_SECRETKEYBYTES'),
    ('t',None,'4096+crypto_sign_BYTES+crypto_sign_PUBLICKEYBYTES+crypto_sign_SECRETKEYBYTES'),
  ),(
    ('loops','8','64'),
    ('maxtest','128','4096'),
  ),(
    ('_keypair',('p','s'),(),()),
    ('',('c','&clen'),(),('m','mlen','s')),
    ('_open',('t','&tlen'),(),('c','clen','p')),
  )),
)

for t in todo:
  o,vars,howmuch,tests = t

  for p in primitives[o]:
    X = []

    X += [f'/* ----- {o}/{p}, derived from supercop/crypto_{o}/try.c */\n']
    X += ['\n']
    X += ['#include <stdio.h>\n']
    X += ['#include <stdlib.h>\n']
    X += ['#include <string.h>\n']
    X += ['#include <stdint.h>\n']
    X += ['#include <lib25519.h>\n']
    X += ['#include "lib25519_test.h"\n']
    X += ['\n']
    X += ['#define fail ((ok = 0),printf)\n']

    X += ['static const char *%s_%s_checksums[] = {\n' % (o,p)]
    X += ['  "%s",\n' % checksums[o,p][0]]
    X += ['  "%s",\n' % checksums[o,p][1]]
    X += ['} ;\n']
    X += ['\n']

    for rettype,fun,args in prototypes[o]:
      X += ['static %s (*%s)(%s);\n' % (rettype,'_'.join(fun),args)]

    if (o,p) in sizes:
      for line in sizes[o,p].splitlines():
        psize = line.split()[1]
        size1 = psize.replace('crypto_%s_%s_'%(o,p),'crypto_%s_'%o)
        size2 = psize.replace('crypto_','lib25519_')
        X += ['#define %s %s\n' % (size1,size2)]
      X += ['\n']

    for v,initsize,allocsize in vars:
      X += ['static void *storage_%s_%s_%s;\n' % (o,p,v)]
      X += ['static unsigned char *test_%s_%s_%s;\n' % (o,p,v)]
    for v,initsize,allocsize in vars:
      X += ['static void *storage_%s_%s_%s2;\n' % (o,p,v)]
      X += ['static unsigned char *test_%s_%s_%s2;\n' % (o,p,v)]
    X += ['\n']

    if (o,p) in precomputed:
      X += ['#define precomputed_%s_%s_NUM %d\n' % (o,p,len(precomputed[o,p]))]
      X += ['\n']
      for pos,precomp in enumerate(precomputed[o,p]):
        for v in precomp:
          if v != 'batch':
            precompstr = ','.join(str(c) for c in precomp[v])
            X += ['static const unsigned char precomputed_%s_%s_%s_%s[] = {%s};\n' % (o,p,v,pos,precompstr)]

      X += ['\n']
      X += ['static const struct {\n']
      for pos,precomp in enumerate(precomputed[o,p]):
        for v in precomp:
          if v == 'batch':
            X += ['  long long batch;\n']
          else:
            X += ['  const unsigned char *%s;\n' % v]
            X += ['  long long %ssize;\n' % v]
        break
      X += ['} precomputed_%s_%s[precomputed_%s_%s_NUM] = {\n' % (o,p,o,p)]
      for pos,precomp in enumerate(precomputed[o,p]):
        X += ['  {\n']
        for v in precomp:
          if v == 'batch':
            X += ['    %s,'%precomp[v]]
          else:
            X += ['    precomputed_%s_%s_%s_%s,%d,'%(o,p,v,pos,len(precomp[v]))]
          X += ['\n']
        X += ['  },\n']
      X += ['} ;\n']
      X += ['\n']

    X += ['static void test_%s_%s_impl(long long impl)\n' % (o,p)]
    X += ['{\n']
    for v,initsize,allocsize in vars:
      X += ['  unsigned char *%s = test_%s_%s_%s;\n' % (v,o,p,v)]
    for v,initsize,allocsize in vars:
      X += ['  unsigned char *%s2 = test_%s_%s_%s2;\n' % (v,o,p,v)]
    mlendefined = False
    for v,initsize,allocsize in vars:
      if initsize is None:
        X += ['  long long %slen;\n' % v]
      else:
        X += ['  long long %slen = %s;\n' % (v,initsize)]
      if v == 'm': mlendefined = True
    X += ['\n']

    X += ['  if (targeti && strcmp(targeti,".") && strcmp(targeti,lib25519_dispatch_%s_%s_implementation(impl))) return;\n' % (o,p)]
    X += ['  if (targetn && atol(targetn) != impl) return;\n'] # XXX: atoll is slightly unportable

    X += ['  if (impl >= 0) {\n']
    for rettype,fun,args in prototypes[o]:
      f2 = ['lib25519','dispatch',o,p]+fun[2:]
      X += ['    %s = %s(impl);\n' % ('_'.join(fun),'_'.join(f2))]
    X += ['    printf("%s_%s %%lld implementation %%s compiler %%s\\n",impl,lib25519_dispatch_%s_%s_implementation(impl),lib25519_dispatch_%s_%s_compiler(impl));\n' % (o,p,o,p,o,p)]
    X += ['  } else {\n']
    for rettype,fun,args in prototypes[o]:
      f2 = ['lib25519',o,p]+fun[2:]
      X += ['    %s = %s;\n' % ('_'.join(fun),'_'.join(f2))]
    X += ['    printf("%s_%s selected implementation %%s compiler %%s\\n",lib25519_%s_%s_implementation(),lib25519_%s_%s_compiler());\n' % (o,p,o,p,o,p)]
    X += ['  }\n']

    X += ['  for (long long checksumbig = 0;checksumbig < 2;++checksumbig) {\n']

    maxtestdefined = False
    for v,small,big in howmuch:
      X += ['    long long %s = checksumbig ? %s : %s;\n' % (v,big,small)]
      if v == 'maxtest': maxtestdefined = True
    if maxtestdefined and not mlendefined:
      X += ['    long long mlen;\n']
    X += ['\n']
    X += ['    checksum_clear();\n']
    X += ['\n']
    X += ['    for (long long loop = 0;loop < loops;++loop) {\n']

    wantresult = False
    for f,output,inout,input in tests:
      cof = 'crypto_'+o+f
      for rettype,fun,args in prototypes[o]:
        if cof == '_'.join(fun):
          if rettype != 'void':
            wantresult = True
    if wantresult:
      X += ['      int result;\n']

    if maxtestdefined:
      X += ['      mlen = myrandom() % (maxtest + 1);\n']
    X += ['\n']

    initialized = set()
    for f,output,inout,input in tests:
      cof = 'crypto_'+o+f

      cofrettype = None
      for rettype,fun,args in prototypes[o]:
        if cof == '_'.join(fun):
          cofrettype = rettype

      expected = '0'
      unexpected = 'nonzero'
      if cof == 'crypto_hashblocks':
        expected = 'mlen % crypto_hashblocks_BLOCKBYTES'
        unexpected = 'unexpected value'

      if cof == 'crypto_sign':
        X += ['      clen = mlen + %s_BYTES;\n' % cof]
      if cof == 'crypto_sign_open':
        X += ['      tlen = clen;\n']
      if cof == 'crypto_powbatch':
        X += ['      qlen = mlen * %s_BYTES;\n' % cof]
        X += ['      plen = mlen * %s_BYTES;\n' % cof]
      if cof == 'crypto_nPbatch':
        X += ['      qlen = mlen * %s_POINTBYTES;\n' % cof]
        X += ['      nlen = mlen * %s_SCALARBYTES;\n' % cof]
        X += ['      plen = mlen * %s_POINTBYTES;\n' % cof]
      if cof == 'crypto_multiscalar':
        X += ['      nlen = mlen * %s_SCALARBYTES;\n' % cof]
        X += ['      Plen = mlen * %s_POINTBYTES;\n' % cof]

      for v in output:
        if len(v) == 1:
          X += ['      output_prepare(%s2,%s,%slen);\n' % (v,v,v)]
          # v now has CDE where C is canary, D is canary, E is canary
          # v2 now has same CDE
          # D is at start of v with specified length
          # C is 16 bytes before beginning
          # E is 16 bytes past end
      for v in input+inout:
        if len(v) == 1:
          if v in initialized:
            X += ['      memcpy(%s2,%s,%slen);\n' % (v,v,v)]
            X += ['      double_canary(%s2,%s,%slen);\n' % (v,v,v)]
          else:
            X += ['      input_prepare(%s2,%s,%slen);\n' % (v,v,v)]
            # v now has CTE where C is canary, T is test data, E is canary
            # v2 has same CTE
            initialized.add(v)

      for v in input+inout:
        if len(v) == 1:
          if cof in ('crypto_mGnP','crypto_multiscalar','crypto_sign_open'):
            X += ['      public(%s,%slen);\n' % (v,v)]
          else:
            X += ['      secret(%s,%slen);\n' % (v,v)]

      args = ','.join(output+inout+input)
      if cofrettype == 'void':
        X += ['      %s(%s);\n' % (cof,args)]
      else:
        X += ['      result = %s(%s);\n' % (cof,args)]
        X += ['      public(&result,sizeof result);\n']
        X += ['      if (result != %s) fail("failure: %s returns %s\\n");\n' % (expected,cof,unexpected)]

      for v in input+inout+output:
        if len(v) == 1:
          X += ['      public(%s,%slen);\n' % (v,v)]

      if cof == 'crypto_sign':
        extrabytes = cof+'_BYTES'
        X += ['      if (clen < mlen) fail("failure: %s returns smaller output than input\\n");\n' % cof]
        X += ['      if (clen > mlen + %s) fail("failure: %s returns more than %s extra bytes\\n");\n' % (extrabytes,cof,extrabytes)]
      if cof == 'crypto_sign_open':
        X += ['      if (tlen != mlen) fail("failure: %s does not match mlen\\n");\n' % cof]
        X += ['      if (memcmp(t,m,mlen) != 0) fail("failure: %s does not match m\\n");\n' % cof]

      for v in output+inout:
        if len(v) == 1:
          X += ['      checksum(%s,%slen);\n' % (v,v)]
          # output v,v2 now has COE,CDE where O is output; checksum O
          initialized.add(v)
      for v in output+inout:
        if len(v) == 1:
          if cof == 'crypto_sign_open' and v == 't':
            X += ['      output_compare(%s2,%s,%slen,"%s");\n' % (v,v,'c',cof)]
          else:
            X += ['      output_compare(%s2,%s,%slen,"%s");\n' % (v,v,v,cof)]
            # output_compare checks COE,CDE for equal C, equal E
      for v in input:
        if len(v) == 1:
          X += ['      input_compare(%s2,%s,%slen,"%s");\n' % (v,v,v,cof)]
          # input_compare checks CTE,CTE for equal C, equal T, equal E

      deterministic = True
      if inout+input == (): deterministic = False
      if cof == 'crypto_sign': deterministic = False

      if deterministic:
        X += ['\n']
        for v in output+inout+input:
          if len(v) == 1:
            X += ['      double_canary(%s2,%s,%slen);\n' % (v,v,v)]
            # old output v,v2: COE,CDE; new v,v2: FOG,FDG where F,G are new canaries
            # old inout v,v2: COE,CTE; new v,v2: FOG,FTG
            # old input v,v2: CTE,CTE; new v,v2: FTG,FTG

        for v in input+inout:
          if len(v) == 1:
            if cof in ('crypto_mGnP','crypto_multiscalar','crypto_sign_open'):
              X += ['      public(%s2,%slen);\n' % (v,v)]
            else:
              X += ['      secret(%s2,%slen);\n' % (v,v)]

        args = ','.join([v if v[-3:] == 'len' else v+'2' for v in output+inout+input])
        if cofrettype == 'void':
          X += ['      %s(%s);\n' % (cof,args)]
        else:
          X += ['      result = %s(%s);\n' % (cof,args)]
          X += ['      public(&result,sizeof result);\n']
          X += ['      if (result != %s) fail("failure: %s returns %s\\n");\n' % (expected,cof,unexpected)]

        for v in input+inout+output:
          if len(v) == 1:
            X += ['      public(%s2,%slen);\n' % (v,v)]

        for w in output + inout:
          if len(w) == 1:
            # w,w2: COE,COE; goal now is to compare O
            X += ['      if (memcmp(%s2,%s,%slen) != 0) fail("failure: %s is nondeterministic\\n");\n' % (w,w,w,cof)]

      overlap = deterministic
      if inout != (): overlap = False

      # XXX: overlap test assumes that inputs are at least as big as outputs

      if overlap:
        for y in output:
          if len(y) == 1:
            X += ['\n']
            for v in output:
              if len(v) == 1:
                X += ['      double_canary(%s2,%s,%slen);\n' % (v,v,v)]
            for v in input:
              if len(v) == 1:
                X += ['      double_canary(%s2,%s,%slen);\n' % (v,v,v)]
            for x in input:
              if len(x) == 1:
                # try writing to x2 instead of y, while reading x2
                args = ','.join([x+'2' if v==y else v for v in output] + [x+'2' if v==x else v for v in input])

                for v in input+inout:
                  v2 = x+'2' if v==x else v
                  if len(v) == 1:
                    if cof in ('crypto_mGnP','crypto_multiscalar','crypto_sign_open'):
                      X += ['      public(%s,%slen);\n' % (v2,v)]
                    else:
                      X += ['      secret(%s,%slen);\n' % (v2,v)]

                if cofrettype == 'void':
                  X += ['      %s(%s);\n' % (cof,args)]
                else:
                  X += ['      result = %s(%s);\n' % (cof,args)]
                  X += ['      public(&result,sizeof result);\n']
                  X += ['      if (result != %s) fail("failure: %s with %s=%s overlap returns %s\\n");\n' % (expected,cof,x,y,unexpected)]

                for v in output:
                  v2 = x+'2' if v==y else v
                  if len(v) == 1:
                    X += ['      public(%s,%slen);\n' % (v2,v)]
                for v in input:
                  if v == x: continue
                  if len(v) == 1:
                    X += ['      public(%s,%slen);\n' % (v,v)]

                X += ['      if (memcmp(%s2,%s,%slen) != 0) fail("failure: %s does not handle %s=%s overlap\\n");\n' % (x,y,y,cof,x,y)]
                X += ['      memcpy(%s2,%s,%slen);\n' % (x,x,x)]

      if cof == 'crypto_sign_open':
        X += ['\n']
        for tweaks in range(3):
          X += ['      c[myrandom() % clen] += 1 + (myrandom() % 255);\n']
          X += ['      if (%s(t,&tlen,c,clen,p) == 0)\n' % cof]
          X += ['        if ((tlen != mlen) || (memcmp(t,m,mlen) != 0))\n']
          X += ['          fail("failure: %s allows trivial forgeries\\n");\n' % cof]

      if cof == 'crypto_dh' and output == ('f',):
        X += ['\n']
        X += ['      if (memcmp(f,e,elen) != 0) fail("failure: %s not associative\\n");\n' % cof]

    X += ['    }\n']
    X += ['    checksum_expected(%s_%s_checksums[checksumbig]);\n' % (o,p)]
    X += ['  }\n']

    # ----- test vectors computed by python

    for f,output,inout,input in tests:
      cof = 'crypto_'+o+f
      if (o,p) in precomputed:
        X += ['  for (long long precomp = 0;precomp < precomputed_%s_%s_NUM;++precomp) {\n' % (o,p)]
        if maxtestdefined and not mlendefined:
          X += ['    long long mlen = precomputed_%s_%s[precomp].batch;\n' % (o,p)]
        for v,initsize,allocsize in vars:
          if v in output:
            X += ['    output_prepare(%s2,%s,precomputed_%s_%s[precomp].%ssize);\n' % (v,v,o,p,v)]
          if v in input+inout:
            X += ['    input_prepare(%s2,%s,precomputed_%s_%s[precomp].%ssize);\n' % (v,v,o,p,v)]
            X += ['    memcpy(%s,precomputed_%s_%s[precomp].%s,precomputed_%s_%s[precomp].%ssize);\n' % (v,o,p,v,o,p,v)]
            X += ['    memcpy(%s2,precomputed_%s_%s[precomp].%s,precomputed_%s_%s[precomp].%ssize);\n' % (v,o,p,v,o,p,v)]

        args = ','.join(output+inout+input)
        X += ['    %s(%s);\n' % (cof,args)]

        for v,initsize,allocsize in vars:
          if v in output+inout:
            X += ['    if (memcmp(%s,precomputed_%s_%s[precomp].%s,precomputed_%s_%s[precomp].%ssize)) {\n' % (v,o,p,v,o,p,v)]
            X += ['      fail("failure: %s fails precomputed test vectors\\n");\n' % cof]
            X += ['      printf("expected %s: ");\n' % v]
            X += ['      for (long long pos = 0;pos < precomputed_%s_%s[precomp].%ssize;++pos) printf("%%02x",precomputed_%s_%s[precomp].%s[pos]);\n' % (o,p,v,o,p,v)]
            X += ['      printf("\\n");\n']
            X += ['      printf("received %s: ");\n' % v]
            X += ['      for (long long pos = 0;pos < precomputed_%s_%s[precomp].%ssize;++pos) printf("%%02x",%s[pos]);\n' % (o,p,v,v)]
            X += ['      printf("\\n");\n']
            X += ['    }\n']

        for v,initsize,allocsize in vars:
          if v in output+inout:
            X += ['    output_compare(%s2,%s,precomputed_%s_%s[precomp].%ssize,"%s");\n' % (v,v,o,p,v,cof)]
          if v in input:
            X += ['    input_compare(%s2,%s,precomputed_%s_%s[precomp].%ssize,"%s");\n' % (v,v,o,p,v,cof)]

        X += ['  }\n']

    X += ['}\n']
    X += ['\n']

    X += ['void test_%s_%s(void)\n' % (o,p)]
    X += ['{\n']
    X += ['  long long maxalloc = 0;\n']
    X += ['  if (targeto && strcmp(targeto,"%s")) return;\n' % o]
    X += ['  if (targetp && strcmp(targetp,"%s")) return;\n' % p]

    for v,initsize,allocsize in vars:
      X += ['  storage_%s_%s_%s = callocplus(%s);\n' % (o,p,v,allocsize)]
      X += ['  test_%s_%s_%s = aligned(storage_%s_%s_%s,%s);\n' % (o,p,v,o,p,v,allocsize)]
      X += [f'  if ({allocsize} > maxalloc) maxalloc = {allocsize};\n']
    for v,initsize,allocsize in vars:
      X += ['  storage_%s_%s_%s2 = callocplus(maxalloc);\n' % (o,p,v)]
      X += ['  test_%s_%s_%s2 = aligned(storage_%s_%s_%s2,%s);\n' % (o,p,v,o,p,v,allocsize)]
    X += ['\n']

    X += ['  for (long long offset = 0;offset < 2;++offset) {\n']
    X += ['    if (targetoffset && atol(targetoffset) != offset) continue;\n']
    X += ['    if (offset && valgrind) break;\n']
    X += ['    printf("%s_%s offset %%lld\\n",offset);\n' % (o,p)]
    X += ['    for (long long impl = -1;impl < lib25519_numimpl_%s_%s();++impl)\n' % (o,p)]
    X += ['      forked(test_%s_%s_impl,impl);\n' % (o,p)]
    for v,initsize,allocsize in vars:
      X += ['    ++test_%s_%s_%s;\n' % (o,p,v)]
    for v,initsize,allocsize in vars:
      X += ['    ++test_%s_%s_%s2;\n' % (o,p,v)]

    X += ['  }\n']

    for v,initsize,allocsize in reversed(vars):
      X += ['  free(storage_%s_%s_%s2);\n' % (o,p,v)]
    for v,initsize,allocsize in reversed(vars):
      X += ['  free(storage_%s_%s_%s);\n' % (o,p,v)]

    X += ['}\n']

    if (o,p) in sizes:
      for line in sizes[o,p].splitlines():
        psize = line.split()[1]
        size1 = psize.replace('crypto_%s_%s_'%(o,p),'crypto_%s_'%o)
        X += ['#undef %s\n' % size1]
      X += ['\n']

    save(f'command/lib25519-test_{o}_{p}.c',''.join(X))


Z += [r'''/* ----- top level */

#include "print_cpuid.inc"

int main(int argc,char **argv)
{
  valgrind_init();
  if (valgrind) limits();

  setvbuf(stdout,0,_IOLBF,0);
  printf("lib25519 version %s\n",lib25519_version);
  printf("lib25519 arch %s\n",lib25519_arch);
  print_cpuid();

  if (valgrind) {
    printf("valgrind %d",(int) valgrind);
    printf(" declassify %d",(int) crypto_declassify_uses_valgrind);
    if (!crypto_declassify_uses_valgrind)
      printf(" (expect false positives)");
    printf("\n");
  }

  if (*argv) ++argv;
  if (*argv) {
    targeto = *argv++;
    if (*argv) {
      targetp = *argv++;
      if (*argv) {
        targeti = *argv++;
        if (*argv) {
          targetn = *argv++;
          if (*argv) {
            targetoffset = *argv++;
          }
        }
      }
    }
  }

''']

for p in primitives['verify']:
  Z += ['  test_verify_BYTES();\n'.replace('BYTES',p)]

for t in todo:
  o,vars,howmuch,tests = t
  for p in primitives[o]:
    Z += ['  test_%s_%s();\n' % (o,p)]
    H += [f'#define test_{o}_{p} lib25519_test_{o}{p}\n']
    H += [f'extern void test_{o}_{p}(void);\n']

Z += [r'''
  if (!ok) {
    printf("some tests failed\n");
    return 100;
  }
  printf("all tests succeeded\n");
  return 0;
}
''']

H += ['#endif\n']

save('command/lib25519_test.h',''.join(H))
save('command/lib25519-test.c',''.join(Z))
