1*1e124260SDamien Miller /* $OpenBSD: smult_curve25519_ref.c,v 1.2 2013/11/02 22:02:14 markus Exp $ */
2*1e124260SDamien Miller /*
3*1e124260SDamien Miller version 20081011
4*1e124260SDamien Miller Matthew Dempsky
5*1e124260SDamien Miller Public domain.
6*1e124260SDamien Miller Derived from public domain code by D. J. Bernstein.
7*1e124260SDamien Miller */
8*1e124260SDamien Miller 
9*1e124260SDamien Miller int crypto_scalarmult_curve25519(unsigned char *, const unsigned char *, const unsigned char *);
10*1e124260SDamien Miller 
add(unsigned int out[32],const unsigned int a[32],const unsigned int b[32])11*1e124260SDamien Miller static void add(unsigned int out[32],const unsigned int a[32],const unsigned int b[32])
12*1e124260SDamien Miller {
13*1e124260SDamien Miller   unsigned int j;
14*1e124260SDamien Miller   unsigned int u;
15*1e124260SDamien Miller   u = 0;
16*1e124260SDamien Miller   for (j = 0;j < 31;++j) { u += a[j] + b[j]; out[j] = u & 255; u >>= 8; }
17*1e124260SDamien Miller   u += a[31] + b[31]; out[31] = u;
18*1e124260SDamien Miller }
19*1e124260SDamien Miller 
sub(unsigned int out[32],const unsigned int a[32],const unsigned int b[32])20*1e124260SDamien Miller static void sub(unsigned int out[32],const unsigned int a[32],const unsigned int b[32])
21*1e124260SDamien Miller {
22*1e124260SDamien Miller   unsigned int j;
23*1e124260SDamien Miller   unsigned int u;
24*1e124260SDamien Miller   u = 218;
25*1e124260SDamien Miller   for (j = 0;j < 31;++j) {
26*1e124260SDamien Miller     u += a[j] + 65280 - b[j];
27*1e124260SDamien Miller     out[j] = u & 255;
28*1e124260SDamien Miller     u >>= 8;
29*1e124260SDamien Miller   }
30*1e124260SDamien Miller   u += a[31] - b[31];
31*1e124260SDamien Miller   out[31] = u;
32*1e124260SDamien Miller }
33*1e124260SDamien Miller 
squeeze(unsigned int a[32])34*1e124260SDamien Miller static void squeeze(unsigned int a[32])
35*1e124260SDamien Miller {
36*1e124260SDamien Miller   unsigned int j;
37*1e124260SDamien Miller   unsigned int u;
38*1e124260SDamien Miller   u = 0;
39*1e124260SDamien Miller   for (j = 0;j < 31;++j) { u += a[j]; a[j] = u & 255; u >>= 8; }
40*1e124260SDamien Miller   u += a[31]; a[31] = u & 127;
41*1e124260SDamien Miller   u = 19 * (u >> 7);
42*1e124260SDamien Miller   for (j = 0;j < 31;++j) { u += a[j]; a[j] = u & 255; u >>= 8; }
43*1e124260SDamien Miller   u += a[31]; a[31] = u;
44*1e124260SDamien Miller }
45*1e124260SDamien Miller 
46*1e124260SDamien Miller static const unsigned int minusp[32] = {
47*1e124260SDamien Miller  19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128
48*1e124260SDamien Miller } ;
49*1e124260SDamien Miller 
freeze(unsigned int a[32])50*1e124260SDamien Miller static void freeze(unsigned int a[32])
51*1e124260SDamien Miller {
52*1e124260SDamien Miller   unsigned int aorig[32];
53*1e124260SDamien Miller   unsigned int j;
54*1e124260SDamien Miller   unsigned int negative;
55*1e124260SDamien Miller 
56*1e124260SDamien Miller   for (j = 0;j < 32;++j) aorig[j] = a[j];
57*1e124260SDamien Miller   add(a,a,minusp);
58*1e124260SDamien Miller   negative = -((a[31] >> 7) & 1);
59*1e124260SDamien Miller   for (j = 0;j < 32;++j) a[j] ^= negative & (aorig[j] ^ a[j]);
60*1e124260SDamien Miller }
61*1e124260SDamien Miller 
mult(unsigned int out[32],const unsigned int a[32],const unsigned int b[32])62*1e124260SDamien Miller static void mult(unsigned int out[32],const unsigned int a[32],const unsigned int b[32])
63*1e124260SDamien Miller {
64*1e124260SDamien Miller   unsigned int i;
65*1e124260SDamien Miller   unsigned int j;
66*1e124260SDamien Miller   unsigned int u;
67*1e124260SDamien Miller 
68*1e124260SDamien Miller   for (i = 0;i < 32;++i) {
69*1e124260SDamien Miller     u = 0;
70*1e124260SDamien Miller     for (j = 0;j <= i;++j) u += a[j] * b[i - j];
71*1e124260SDamien Miller     for (j = i + 1;j < 32;++j) u += 38 * a[j] * b[i + 32 - j];
72*1e124260SDamien Miller     out[i] = u;
73*1e124260SDamien Miller   }
74*1e124260SDamien Miller   squeeze(out);
75*1e124260SDamien Miller }
76*1e124260SDamien Miller 
mult121665(unsigned int out[32],const unsigned int a[32])77*1e124260SDamien Miller static void mult121665(unsigned int out[32],const unsigned int a[32])
78*1e124260SDamien Miller {
79*1e124260SDamien Miller   unsigned int j;
80*1e124260SDamien Miller   unsigned int u;
81*1e124260SDamien Miller 
82*1e124260SDamien Miller   u = 0;
83*1e124260SDamien Miller   for (j = 0;j < 31;++j) { u += 121665 * a[j]; out[j] = u & 255; u >>= 8; }
84*1e124260SDamien Miller   u += 121665 * a[31]; out[31] = u & 127;
85*1e124260SDamien Miller   u = 19 * (u >> 7);
86*1e124260SDamien Miller   for (j = 0;j < 31;++j) { u += out[j]; out[j] = u & 255; u >>= 8; }
87*1e124260SDamien Miller   u += out[j]; out[j] = u;
88*1e124260SDamien Miller }
89*1e124260SDamien Miller 
square(unsigned int out[32],const unsigned int a[32])90*1e124260SDamien Miller static void square(unsigned int out[32],const unsigned int a[32])
91*1e124260SDamien Miller {
92*1e124260SDamien Miller   unsigned int i;
93*1e124260SDamien Miller   unsigned int j;
94*1e124260SDamien Miller   unsigned int u;
95*1e124260SDamien Miller 
96*1e124260SDamien Miller   for (i = 0;i < 32;++i) {
97*1e124260SDamien Miller     u = 0;
98*1e124260SDamien Miller     for (j = 0;j < i - j;++j) u += a[j] * a[i - j];
99*1e124260SDamien Miller     for (j = i + 1;j < i + 32 - j;++j) u += 38 * a[j] * a[i + 32 - j];
100*1e124260SDamien Miller     u *= 2;
101*1e124260SDamien Miller     if ((i & 1) == 0) {
102*1e124260SDamien Miller       u += a[i / 2] * a[i / 2];
103*1e124260SDamien Miller       u += 38 * a[i / 2 + 16] * a[i / 2 + 16];
104*1e124260SDamien Miller     }
105*1e124260SDamien Miller     out[i] = u;
106*1e124260SDamien Miller   }
107*1e124260SDamien Miller   squeeze(out);
108*1e124260SDamien Miller }
109*1e124260SDamien Miller 
select(unsigned int p[64],unsigned int q[64],const unsigned int r[64],const unsigned int s[64],unsigned int b)110*1e124260SDamien Miller static void select(unsigned int p[64],unsigned int q[64],const unsigned int r[64],const unsigned int s[64],unsigned int b)
111*1e124260SDamien Miller {
112*1e124260SDamien Miller   unsigned int j;
113*1e124260SDamien Miller   unsigned int t;
114*1e124260SDamien Miller   unsigned int bminus1;
115*1e124260SDamien Miller 
116*1e124260SDamien Miller   bminus1 = b - 1;
117*1e124260SDamien Miller   for (j = 0;j < 64;++j) {
118*1e124260SDamien Miller     t = bminus1 & (r[j] ^ s[j]);
119*1e124260SDamien Miller     p[j] = s[j] ^ t;
120*1e124260SDamien Miller     q[j] = r[j] ^ t;
121*1e124260SDamien Miller   }
122*1e124260SDamien Miller }
123*1e124260SDamien Miller 
mainloop(unsigned int work[64],const unsigned char e[32])124*1e124260SDamien Miller static void mainloop(unsigned int work[64],const unsigned char e[32])
125*1e124260SDamien Miller {
126*1e124260SDamien Miller   unsigned int xzm1[64];
127*1e124260SDamien Miller   unsigned int xzm[64];
128*1e124260SDamien Miller   unsigned int xzmb[64];
129*1e124260SDamien Miller   unsigned int xzm1b[64];
130*1e124260SDamien Miller   unsigned int xznb[64];
131*1e124260SDamien Miller   unsigned int xzn1b[64];
132*1e124260SDamien Miller   unsigned int a0[64];
133*1e124260SDamien Miller   unsigned int a1[64];
134*1e124260SDamien Miller   unsigned int b0[64];
135*1e124260SDamien Miller   unsigned int b1[64];
136*1e124260SDamien Miller   unsigned int c1[64];
137*1e124260SDamien Miller   unsigned int r[32];
138*1e124260SDamien Miller   unsigned int s[32];
139*1e124260SDamien Miller   unsigned int t[32];
140*1e124260SDamien Miller   unsigned int u[32];
141*1e124260SDamien Miller   unsigned int j;
142*1e124260SDamien Miller   unsigned int b;
143*1e124260SDamien Miller   int pos;
144*1e124260SDamien Miller 
145*1e124260SDamien Miller   for (j = 0;j < 32;++j) xzm1[j] = work[j];
146*1e124260SDamien Miller   xzm1[32] = 1;
147*1e124260SDamien Miller   for (j = 33;j < 64;++j) xzm1[j] = 0;
148*1e124260SDamien Miller 
149*1e124260SDamien Miller   xzm[0] = 1;
150*1e124260SDamien Miller   for (j = 1;j < 64;++j) xzm[j] = 0;
151*1e124260SDamien Miller 
152*1e124260SDamien Miller   for (pos = 254;pos >= 0;--pos) {
153*1e124260SDamien Miller     b = e[pos / 8] >> (pos & 7);
154*1e124260SDamien Miller     b &= 1;
155*1e124260SDamien Miller     select(xzmb,xzm1b,xzm,xzm1,b);
156*1e124260SDamien Miller     add(a0,xzmb,xzmb + 32);
157*1e124260SDamien Miller     sub(a0 + 32,xzmb,xzmb + 32);
158*1e124260SDamien Miller     add(a1,xzm1b,xzm1b + 32);
159*1e124260SDamien Miller     sub(a1 + 32,xzm1b,xzm1b + 32);
160*1e124260SDamien Miller     square(b0,a0);
161*1e124260SDamien Miller     square(b0 + 32,a0 + 32);
162*1e124260SDamien Miller     mult(b1,a1,a0 + 32);
163*1e124260SDamien Miller     mult(b1 + 32,a1 + 32,a0);
164*1e124260SDamien Miller     add(c1,b1,b1 + 32);
165*1e124260SDamien Miller     sub(c1 + 32,b1,b1 + 32);
166*1e124260SDamien Miller     square(r,c1 + 32);
167*1e124260SDamien Miller     sub(s,b0,b0 + 32);
168*1e124260SDamien Miller     mult121665(t,s);
169*1e124260SDamien Miller     add(u,t,b0);
170*1e124260SDamien Miller     mult(xznb,b0,b0 + 32);
171*1e124260SDamien Miller     mult(xznb + 32,s,u);
172*1e124260SDamien Miller     square(xzn1b,c1);
173*1e124260SDamien Miller     mult(xzn1b + 32,r,work);
174*1e124260SDamien Miller     select(xzm,xzm1,xznb,xzn1b,b);
175*1e124260SDamien Miller   }
176*1e124260SDamien Miller 
177*1e124260SDamien Miller   for (j = 0;j < 64;++j) work[j] = xzm[j];
178*1e124260SDamien Miller }
179*1e124260SDamien Miller 
recip(unsigned int out[32],const unsigned int z[32])180*1e124260SDamien Miller static void recip(unsigned int out[32],const unsigned int z[32])
181*1e124260SDamien Miller {
182*1e124260SDamien Miller   unsigned int z2[32];
183*1e124260SDamien Miller   unsigned int z9[32];
184*1e124260SDamien Miller   unsigned int z11[32];
185*1e124260SDamien Miller   unsigned int z2_5_0[32];
186*1e124260SDamien Miller   unsigned int z2_10_0[32];
187*1e124260SDamien Miller   unsigned int z2_20_0[32];
188*1e124260SDamien Miller   unsigned int z2_50_0[32];
189*1e124260SDamien Miller   unsigned int z2_100_0[32];
190*1e124260SDamien Miller   unsigned int t0[32];
191*1e124260SDamien Miller   unsigned int t1[32];
192*1e124260SDamien Miller   int i;
193*1e124260SDamien Miller 
194*1e124260SDamien Miller   /* 2 */ square(z2,z);
195*1e124260SDamien Miller   /* 4 */ square(t1,z2);
196*1e124260SDamien Miller   /* 8 */ square(t0,t1);
197*1e124260SDamien Miller   /* 9 */ mult(z9,t0,z);
198*1e124260SDamien Miller   /* 11 */ mult(z11,z9,z2);
199*1e124260SDamien Miller   /* 22 */ square(t0,z11);
200*1e124260SDamien Miller   /* 2^5 - 2^0 = 31 */ mult(z2_5_0,t0,z9);
201*1e124260SDamien Miller 
202*1e124260SDamien Miller   /* 2^6 - 2^1 */ square(t0,z2_5_0);
203*1e124260SDamien Miller   /* 2^7 - 2^2 */ square(t1,t0);
204*1e124260SDamien Miller   /* 2^8 - 2^3 */ square(t0,t1);
205*1e124260SDamien Miller   /* 2^9 - 2^4 */ square(t1,t0);
206*1e124260SDamien Miller   /* 2^10 - 2^5 */ square(t0,t1);
207*1e124260SDamien Miller   /* 2^10 - 2^0 */ mult(z2_10_0,t0,z2_5_0);
208*1e124260SDamien Miller 
209*1e124260SDamien Miller   /* 2^11 - 2^1 */ square(t0,z2_10_0);
210*1e124260SDamien Miller   /* 2^12 - 2^2 */ square(t1,t0);
211*1e124260SDamien Miller   /* 2^20 - 2^10 */ for (i = 2;i < 10;i += 2) { square(t0,t1); square(t1,t0); }
212*1e124260SDamien Miller   /* 2^20 - 2^0 */ mult(z2_20_0,t1,z2_10_0);
213*1e124260SDamien Miller 
214*1e124260SDamien Miller   /* 2^21 - 2^1 */ square(t0,z2_20_0);
215*1e124260SDamien Miller   /* 2^22 - 2^2 */ square(t1,t0);
216*1e124260SDamien Miller   /* 2^40 - 2^20 */ for (i = 2;i < 20;i += 2) { square(t0,t1); square(t1,t0); }
217*1e124260SDamien Miller   /* 2^40 - 2^0 */ mult(t0,t1,z2_20_0);
218*1e124260SDamien Miller 
219*1e124260SDamien Miller   /* 2^41 - 2^1 */ square(t1,t0);
220*1e124260SDamien Miller   /* 2^42 - 2^2 */ square(t0,t1);
221*1e124260SDamien Miller   /* 2^50 - 2^10 */ for (i = 2;i < 10;i += 2) { square(t1,t0); square(t0,t1); }
222*1e124260SDamien Miller   /* 2^50 - 2^0 */ mult(z2_50_0,t0,z2_10_0);
223*1e124260SDamien Miller 
224*1e124260SDamien Miller   /* 2^51 - 2^1 */ square(t0,z2_50_0);
225*1e124260SDamien Miller   /* 2^52 - 2^2 */ square(t1,t0);
226*1e124260SDamien Miller   /* 2^100 - 2^50 */ for (i = 2;i < 50;i += 2) { square(t0,t1); square(t1,t0); }
227*1e124260SDamien Miller   /* 2^100 - 2^0 */ mult(z2_100_0,t1,z2_50_0);
228*1e124260SDamien Miller 
229*1e124260SDamien Miller   /* 2^101 - 2^1 */ square(t1,z2_100_0);
230*1e124260SDamien Miller   /* 2^102 - 2^2 */ square(t0,t1);
231*1e124260SDamien Miller   /* 2^200 - 2^100 */ for (i = 2;i < 100;i += 2) { square(t1,t0); square(t0,t1); }
232*1e124260SDamien Miller   /* 2^200 - 2^0 */ mult(t1,t0,z2_100_0);
233*1e124260SDamien Miller 
234*1e124260SDamien Miller   /* 2^201 - 2^1 */ square(t0,t1);
235*1e124260SDamien Miller   /* 2^202 - 2^2 */ square(t1,t0);
236*1e124260SDamien Miller   /* 2^250 - 2^50 */ for (i = 2;i < 50;i += 2) { square(t0,t1); square(t1,t0); }
237*1e124260SDamien Miller   /* 2^250 - 2^0 */ mult(t0,t1,z2_50_0);
238*1e124260SDamien Miller 
239*1e124260SDamien Miller   /* 2^251 - 2^1 */ square(t1,t0);
240*1e124260SDamien Miller   /* 2^252 - 2^2 */ square(t0,t1);
241*1e124260SDamien Miller   /* 2^253 - 2^3 */ square(t1,t0);
242*1e124260SDamien Miller   /* 2^254 - 2^4 */ square(t0,t1);
243*1e124260SDamien Miller   /* 2^255 - 2^5 */ square(t1,t0);
244*1e124260SDamien Miller   /* 2^255 - 21 */ mult(out,t1,z11);
245*1e124260SDamien Miller }
246*1e124260SDamien Miller 
crypto_scalarmult_curve25519(unsigned char * q,const unsigned char * n,const unsigned char * p)247*1e124260SDamien Miller int crypto_scalarmult_curve25519(unsigned char *q,
248*1e124260SDamien Miller   const unsigned char *n,
249*1e124260SDamien Miller   const unsigned char *p)
250*1e124260SDamien Miller {
251*1e124260SDamien Miller   unsigned int work[96];
252*1e124260SDamien Miller   unsigned char e[32];
253*1e124260SDamien Miller   unsigned int i;
254*1e124260SDamien Miller   for (i = 0;i < 32;++i) e[i] = n[i];
255*1e124260SDamien Miller   e[0] &= 248;
256*1e124260SDamien Miller   e[31] &= 127;
257*1e124260SDamien Miller   e[31] |= 64;
258*1e124260SDamien Miller   for (i = 0;i < 32;++i) work[i] = p[i];
259*1e124260SDamien Miller   mainloop(work,e);
260*1e124260SDamien Miller   recip(work + 32,work + 32);
261*1e124260SDamien Miller   mult(work + 64,work,work + 32);
262*1e124260SDamien Miller   freeze(work + 64);
263*1e124260SDamien Miller   for (i = 0;i < 32;++i) q[i] = work[64 + i];
264*1e124260SDamien Miller   return 0;
265*1e124260SDamien Miller }
266