FORM 4.3
optimize.cc
Go to the documentation of this file.
1
6/* #[ License : */
7/*
8 * Copyright (C) 1984-2022 J.A.M. Vermaseren
9 * When using this file you are requested to refer to the publication
10 * J.A.M.Vermaseren "New features of FORM" math-ph/0010025
11 * This is considered a matter of courtesy as the development was paid
12 * for by FOM the Dutch physics granting agency and we would like to
13 * be able to track its scientific use to convince FOM of its value
14 * for the community.
15 *
16 * This file is part of FORM.
17 *
18 * FORM is free software: you can redistribute it and/or modify it under the
19 * terms of the GNU General Public License as published by the Free Software
20 * Foundation, either version 3 of the License, or (at your option) any later
21 * version.
22 *
23 * FORM is distributed in the hope that it will be useful, but WITHOUT ANY
24 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
25 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
26 * details.
27 *
28 * You should have received a copy of the GNU General Public License along
29 * with FORM. If not, see <http://www.gnu.org/licenses/>.
30 */
31/*
32 #] License :
33 #[ includes :
34*/
35
36//#define DEBUG
37//#define DEBUG_MORE
38//#define DEBUG_MCTS
39//#define DEBUG_GREEDY
40
41#ifdef HAVE_CONFIG_H
42#ifndef CONFIG_H_INCLUDED
43#define CONFIG_H_INCLUDED
44#include <config.h>
45#endif
46#endif
47
48#include <vector>
49#include <stack>
50#include <algorithm>
51#include <set>
52#include <map>
53#include <climits>
54#include <cmath>
55#include <string>
56#include <algorithm>
57#include <iostream>
58
59#ifdef APPLE64
60#define HAVE_UNORDERED_MAP
61#define HAVE_UNORDERED_SET
62#endif
63
64#ifdef HAVE_UNORDERED_MAP
65 #include <unordered_map>
66 using std::unordered_map;
67#elif !defined(HAVE_TR1_UNORDERED_MAP) && defined(HAVE_BOOST_UNORDERED_MAP_HPP)
68 #include <boost/unordered_map.hpp>
69 using boost::unordered_map;
70#else
71 #include <tr1/unordered_map>
72 using std::tr1::unordered_map;
73#endif
74#ifdef HAVE_UNORDERED_SET
75 #include <unordered_set>
76 using std::unordered_set;
77#elif !defined(HAVE_TR1_UNORDERED_SET) && defined(HAVE_BOOST_UNORDERED_SET_HPP)
78 #include <boost/unordered_set.hpp>
79 using boost::unordered_set;
80#else
81 #include <tr1/unordered_set>
82 using std::tr1::unordered_set;
83#endif
84
85#if defined(HAVE_BUILTIN_POPCOUNT)
86 static inline int popcount(unsigned int x) {
87 return __builtin_popcount(x);
88 }
89#elif defined(HAVE_POPCNT)
90 #include <intrin.h>
91 static inline int popcount(unsigned int x) {
92 return __popcnt(x);
93 }
94#else
95 static inline int popcount(unsigned int x) {
96 int count = 0;
97 while (x > 0) {
98 if ((x & 1) == 1) count++;
99 x >>= 1;
100 }
101 return count;
102 }
103#endif
104
105extern "C" {
106#include "form3.h"
107}
108
109//#ifdef DEBUG
110#include "mytime.h"
111//#endif
112
113using namespace std;
114
115// operators
116const WORD OPER_ADD = -1;
117const WORD OPER_MUL = -2;
118const WORD OPER_COMMA = -3;
119
120// class for a node of the MCTS tree
122public:
123 vector<tree_node> childs;
124 double sum_results;
125 int num_visits;
126 WORD var;
127 bool finished;
128 PADPOINTER(1,1,1,1);
129
130 tree_node (int _var=0):
131 sum_results(0), num_visits(0), var(_var), finished(false) {}
132};
133
134// global variables for multithreading
135WORD *optimize_expr;
136vector<vector<WORD> > optimize_best_Horner_schemes;
137int optimize_num_vars;
138int optimize_best_num_oper;
139vector<WORD> optimize_best_instr;
140vector<WORD> optimize_best_vars;
141
142// global variables for MCTS
143bool mcts_factorized, mcts_separated;
144vector<WORD> mcts_vars;
145tree_node mcts_root;
146int mcts_expr_score;
147set<pair<int,vector<WORD> > > mcts_best_schemes;
148
149#ifdef WITHPTHREADS
150pthread_mutex_t optimize_lock;
151#endif
152
153/*
154 #] includes :
155 #[ print_instr :
156*/
157
158void print_instr (const vector<WORD> &instr, WORD num)
159{
160 const WORD *tbegin = &*instr.begin();
161 const WORD *tend = tbegin+instr.size();
162 for (const WORD *t=tbegin; t!=tend; t+=*(t+2)) {
163 MesPrint("<%d> %a",num,t[2],t);
164 }
165}
166
167/*
168 #] print_instr :
169 #[ my_random_shuffle :
170*/
171
179template <class RandomAccessIterator>
180void my_random_shuffle (PHEAD RandomAccessIterator fr, RandomAccessIterator to) {
181 for (int i=to-fr-1; i>0; --i)
182 swap (fr[i],fr[wranf(BHEAD0) % (i+1)]);
183}
184
185/*
186 #] my_random_shuffle :
187 #[ get_expression :
188*/
189
190static WORD comlist[3] = {TYPETOPOLYNOMIAL,3,DOALL};
191
204LONG get_expression (int exprnr) {
205
206 GETIDENTITY;
207
208 AR.NoCompress = 1;
209
210 NewSort(BHEAD0);
211 EXPRESSIONS e = Expressions+exprnr;
212 SetScratch(AR.infile,&(e->onfile));
213
214 // get header term
215 WORD *term = AT.WorkPointer;
216 GetTerm(BHEAD term);
217
218 NewSort(BHEAD0);
219
220 // get terms
221 while (GetTerm(BHEAD term) > 0) {
222 AT.WorkPointer = term + *term;
223 WORD *t1 = term;
224 WORD *t2 = term + *term;
225 if (ConvertToPoly(BHEAD t1,t2,comlist,1) < 0) return -1;
226 int n = *t2;
227 NCOPY(t1,t2,n);
228 AT.WorkPointer = term + *term;
229 if (StoreTerm(BHEAD term)) return -1;
230 }
231
232 // sort and store in buffer
233 LONG len = EndSort(BHEAD (WORD *)((VOID *)(&optimize_expr)),2);
235 AT.WorkPointer = term;
236
237 return len;
238}
239
240/*
241 #] get_expression :
242 #[ PF_get_expression :
243*/
244#ifdef WITHMPI
245
246// get_expression for ParFORM
247LONG PF_get_expression (int exprnr) {
248 LONG len;
249 if (PF.me == MASTER) {
250 len = get_expression(exprnr);
251 }
252 if (PF.numtasks > 1) {
253 PF_BroadcastBuffer(&optimize_expr, &len);
254 }
255 return len;
256}
257
258// replace get_expression called in Optimize
259#define get_expression PF_get_expression
260
261#endif
262/*
263 #] PF_get_expression :
264 #[ get_brackets :
265*/
266
281vector<vector<WORD> > get_brackets () {
282
283 // check for brackets in expression
284 bool has_brackets = false;
285 for (WORD *t=optimize_expr; *t!=0; t+=*t) {
286 WORD *tend=t+*t; tend-=ABS(*(tend-1));
287 for (WORD *t1=t+1; t1<tend; t1+=*(t1+1))
288 if (*t1 == HAAKJE)
289 has_brackets=true;
290 }
291
292 // replace brackets by SEPARATESYMBOL
293 vector<vector<WORD> > brackets;
294
295 if (has_brackets) {
296 int exprlen=10; // we need potential space for an empty bracket
297 for (WORD *t=optimize_expr; *t!=0; t+=*t)
298 exprlen += *t;
299 WORD *newexpr = (WORD *)Malloc1(exprlen*sizeof(WORD), "optimize newexpr");
300
301 int i=0;
302 int sep_power = 0;
303
304 for (WORD *t=optimize_expr; *t!=0; t+=*t) {
305 WORD *t1 = t+1;
306
307 // scan for bracket
308 vector<WORD> bracket;
309 for (; *t1!=HAAKJE; t1+=*(t1+1))
310 bracket.insert(bracket.end(), t1, t1+*(t1+1));
311
312 if (brackets.size()==0 || bracket!=brackets.back()) {
313 sep_power++;
314 brackets.push_back(bracket);
315 }
316 t1+=*(t1+1);
317
318 WORD left = t + *t - t1;
319 bool more_symbols = (left != ABS(*(t+*t-1)));
320
321 // add power of SEPARATESYMBOL
322 newexpr[i++] = 1 + left + (more_symbols ? 2 : 4);
323 newexpr[i++] = SYMBOL;
324 newexpr[i++] = (more_symbols ? *(t1+1) + 2 : 4);
325 newexpr[i++] = SEPARATESYMBOL;
326 newexpr[i++] = sep_power;
327
328 // add remaining terms
329 if (more_symbols) {
330 t1+=2;
331 left-=2;
332 }
333 while (left-->0)
334 newexpr[i++] = *(t1++);
335 }
336/*
337 We insert here an empty bracket that is zero.
338 It is used for the case that there is only a single bracket which is
339 outside the notation for trees at a later stage.
340
341 There may be a problem now in that in the case of sep_power==1
342 newexpr is bigger than optimize_expr. We have to check that.
343*/
344 if ( sep_power == 1 )
345 {
346 WORD *t;
347 vector<WORD> bracket;
348 bracket.push_back(0);
349 bracket.push_back(0);
350 bracket.push_back(0);
351 bracket.push_back(0);
352 sep_power++;
353 brackets.push_back(bracket);
354 newexpr[i++] = 8;
355 newexpr[i++] = SYMBOL;
356 newexpr[i++] = 4;
357 newexpr[i++] = SEPARATESYMBOL;
358 newexpr[i++] = sep_power;
359 newexpr[i++] = 1;
360 newexpr[i++] = 1;
361 newexpr[i++] = 3;
362 newexpr[i++] = 0;
363 for (t=optimize_expr; *t!=0; t+=*t) {}
364 if ( t-optimize_expr+1 < i ) { // We have to redo this
365 M_free(optimize_expr,"$-sort space");
366 optimize_expr = (WORD *)Malloc1(i*sizeof(WORD),"$-sort space");
367 }
368 }
369 else {
370 newexpr[i++] = 0;
371 }
372 memcpy(optimize_expr, newexpr, i*sizeof(WORD));
373 M_free(newexpr,"optimize newexpr");
374
375 // if factorized, replace SEP by FAC and remove brackets
376 if (brackets[0].size()>0 && brackets[0][2]==FACTORSYMBOL) {
377 for (WORD *t=optimize_expr; *t!=0; t+=*t) {
378 if (*t == ABS(*(t+*t-1))+1) continue;
379 if (t[1]==SYMBOL)
380 for (int i=3; i<t[2]; i+=2)
381 if (t[i]==SEPARATESYMBOL) t[i]=FACTORSYMBOL;
382 }
383 return vector<vector<WORD> >();
384 }
385 }
386
387 return brackets;
388}
389
390/*
391 #] get_brackets :
392 #[ count_operators :
393*/
394
401int count_operators (const WORD *expr, bool print=false) {
402
403 int n=0;
404 while (*(expr+n)!=0) n+=*(expr+n);
405
406 int cntpow=0, cntmul=0, cntadd=0, sumpow=0;
407 WORD maxpowfac=1, maxpowsep=1;
408
409 for (const WORD *t=expr; *t!=0; t+=*t) {
410 if (t!=expr) cntadd++; // new term
411 if (*t==ABS(*(t+*t-1))+1) continue; // only coefficient
412
413 int cntsym=0;
414
415 if (t[1]==SYMBOL)
416 for (int i=3; i<t[2]; i+=2) {
417 if (t[i]==FACTORSYMBOL) {
418 maxpowfac = max(maxpowfac, t[i+1]);
419 continue;
420 }
421 if (t[i]==SEPARATESYMBOL) {
422 maxpowsep = max(maxpowsep, t[i+1]);
423 continue;
424 }
425 if (t[i+1]>2) { // (extra)symbol power>2
426 cntpow++;
427 sumpow += (int)floor(log(t[i+1])/log(2.0)) + popcount(t[i+1]) - 1;
428 }
429 if (t[i+1]==2) cntmul++; // (extra)symbol squared
430 cntsym++;
431 }
432
433 if (ABS(*(t+*t-1))!=3 || *(t+*t-2)!=1 || *(t+*t-3)!=1) cntsym++; // non +/-1 coefficient
434
435 if (cntsym > 0) cntmul+=cntsym-1;
436 }
437
438 cntadd -= maxpowfac-1;
439 cntmul += maxpowfac-1;
440
441 cntadd -= maxpowsep-1;
442
443 if (print)
444 MesPrint ("*** STATS: original %lP %lM %lA : %l", cntpow,cntmul,cntadd,sumpow+cntmul+cntadd);
445
446 return sumpow+cntmul+cntadd;
447}
448
455int count_operators (const vector<WORD> &instr, bool print=false) {
456
457 int cntpow=0, cntmul=0, cntadd=0, sumpow=0;
458
459 const WORD *ebegin = &*instr.begin();
460 const WORD *eend = ebegin+instr.size();
461
462 for (const WORD *e=ebegin; e!=eend; e+=*(e+2)) {
463 for (const WORD *t=e+3; *t!=0; t+=*t) {
464 if (t!=e+3) {
465 if (*(e+1)==OPER_ADD) cntadd++; // new term
466 if (*(e+1)==OPER_MUL) cntmul++; // new term
467 }
468 if (*t == ABS(*(t+*t-1))+1) continue; // only coefficient
469 if (*(t+1)==SYMBOL || *(t+1)==EXTRASYMBOL) {
470 if (*(t+4)==2) cntmul++; // (extra)symbol squared
471 if (*(t+4)>2) { // (extra)symbol power>2
472 cntpow++;
473 sumpow += (int)floor(log(*(t+4))/log(2.0)) + popcount(*(t+4)) - 1;
474 }
475 }
476 if (ABS(*(t+*t-1))!=3 || *(t+*t-2)!=1 || *(t+*t-3)!=1) cntmul++; // non +/-1 coefficient
477 }
478 }
479
480 if (print)
481 MesPrint ("*** STATS: optimized %lP %lM %lA : %l", cntpow,cntmul,cntadd,sumpow+cntmul+cntadd);
482
483 return sumpow+cntmul+cntadd;
484}
485
486/*
487 #] count_operators :
488 #[ occurrence_order :
489*/
490
498vector<WORD> occurrence_order (const WORD *expr, bool rev) {
499
500 // count the number of occurrences of variables
501 map<WORD,int> cnt;
502 for (const WORD *t=expr; *t!=0; t+=*t) {
503 if (*t == ABS(*(t+*t-1))+1) continue; // skip constant term
504 if (t[1] == SYMBOL)
505 for (int i=3; i<t[2]; i+=2)
506 cnt[t[i]]++;
507 }
508
509 bool is_fac=false, is_sep=false;
510 if (cnt.count(FACTORSYMBOL)) {
511 is_fac=true;
512 cnt.erase(FACTORSYMBOL);
513 }
514 if (cnt.count(SEPARATESYMBOL)) {
515 is_sep=true;
516 cnt.erase(SEPARATESYMBOL);
517 }
518
519 // determine the order of the variables
520 vector<pair<int,WORD> > cnt_order;
521 for (map<WORD,int>::iterator i=cnt.begin(); i!=cnt.end(); i++)
522 cnt_order.push_back(make_pair(i->second, i->first));
523 sort(cnt_order.rbegin(), cnt_order.rend());
524
525 // create resulting order
526 vector<WORD> order;
527 for (int i=0; i<(int)cnt_order.size(); i++)
528 order.push_back(cnt_order[i].second);
529
530 if (rev) reverse(order.begin(),order.end());
531
532 // add FACTORSYMBOL/SEPARATESYMBOL
533 if (is_fac) order.insert(order.begin(), FACTORSYMBOL);
534 if (is_sep) order.insert(order.begin(), SEPARATESYMBOL);
535
536 return order;
537}
538
539/*
540 #] occurrence_order :
541 #[ Horner_tree :
542*/
543
579WORD getpower (const WORD *term, int var, int pos) {
580 if (*term == ABS(*(term+*term-1))+1) return 0; // constant term
581 if (2*pos+2 >= term[2]) return 0; // too few symbols
582 if (term[2*pos+3] != var) return 0; // incorrect symbol
583 return term[2*pos+4]; // return power
584}
585
593void fixarg (UWORD *t, WORD &n) {
594 int an=ABS(n), sn=SGN(n);
595 while (*(t+an-1)==0) an--;
596 n=an*sn;
597}
598
599void GcdLong_fix_args (PHEAD UWORD *a, WORD na, UWORD *b, WORD nb, UWORD *c, WORD *nc) {
600 fixarg(a,na);
601 fixarg(b,nb);
602 GcdLong(BHEAD a,na,b,nb,c,nc);
603}
604
605void DivLong_fix_args(UWORD *a, WORD na, UWORD *b, WORD nb, UWORD *c, WORD *nc, UWORD *d, WORD *nd) {
606 fixarg(a,na);
607 fixarg(b,nb);
608 DivLong(a,na,b,nb,c,nc,d,nd);
609}
610
631void build_Horner_tree (const WORD **terms, int numterms, int var, int maxvar, int pos, vector<WORD> *res) {
632
633 GETIDENTITY;
634
635 if (var == maxvar) {
636 // Horner tree is finished, so add remaining terms unfactorized
637 // (note: since only complete Horner schemes seem to be useful, numterms=1 here
638
639 for (int fr=0; fr<numterms; fr++) {
640
641 bool empty = true;
642 const WORD *t = terms[fr];
643
644 // add symbols
645 if (*t != ABS(*(t+*t-1))+1)
646 for (int i=3+2*pos; i<t[2]; i+=2) {
647 res->push_back(SYMBOL);
648 res->push_back(4);
649 res->push_back(t[i]);
650 res->push_back(t[i+1]);
651 if (!empty) res->push_back(OPER_MUL);
652 empty = false;
653 }
654
655 // if empty, add a 1, since the result should look like "... coeff *"
656 if (empty) {
657 res->push_back(SNUMBER);
658 res->push_back(5);
659 res->push_back(1);
660 res->push_back(1);
661 res->push_back(3);
662 }
663
664 // add coefficient
665 res->push_back(SNUMBER);
666 WORD n = ABS(*(t+*t-1));
667 res->push_back(n+2);
668 for (int i=0; i<n; i++)
669 res->push_back(*(t+*t-n+i));
670 res->push_back(OPER_MUL);
671
672 if (fr>0) res->push_back(OPER_ADD);
673 }
674
675 // result should end with gcd of the terms; right now this never
676 // triggers, but if one would allow for incomplete Horner schemes,
677 // one should extract the gcd here
678 if (numterms > 1) {
679 res->push_back(SNUMBER);
680 res->push_back(5);
681 res->push_back(1);
682 res->push_back(1);
683 res->push_back(3);
684 res->push_back(OPER_MUL);
685 }
686 }
687 else {
688 // extract variable "var" and the gcd and proceed recursively
689
690 WORD nnum = 0, nden = 0, ntmp = 0, ndum = 0;
691 UWORD *num = NumberMalloc("build_Horner_tree");
692 UWORD *den = NumberMalloc("build_Horner_tree");
693 UWORD *tmp = NumberMalloc("build_Horner_tree");
694 UWORD *dum = NumberMalloc("build_Horner_tree");
695
696 // previous coefficient for gcd extraction or coefficient multiplication
697 int prev_coeff_idx = -1;
698
699 for (int fr=0; fr<numterms;) {
700
701 // find power of current term
702 WORD pow = getpower(terms[fr], var, pos);
703
704 // find all terms with that power
705 int to=fr+1;
706 while (to<numterms && getpower(terms[to], var, pos) == pow) to++;
707
708 // recursively build Horner tree of all terms proportional to var^pow
709 build_Horner_tree (terms+fr, to-fr, var+1, maxvar, pow==0?pos:pos+1, res);
710
711 if (AN.poly_vars[var] != FACTORSYMBOL && AN.poly_vars[var] != SEPARATESYMBOL) {
712 // if normal symbol, find gcd(numerators) and gcd(denominators)
713 WORD n1 = res->at(res->size()-2) / 2;
714 WORD *t1 = &res->at(res->size()-2-2*ABS(n1));
715
716 WORD *t2 = fr==0 ? t1 : &res->at(prev_coeff_idx);
717 WORD n2 = fr==0 ? n1 : *(t2+*(t2+1)-1) / 2;
718 if (fr>0) t2+=2;
719
720 GcdLong_fix_args(BHEAD (UWORD *)t1,n1,(UWORD *)t2,n2,num,&nnum);
721 GcdLong_fix_args(BHEAD (UWORD *)t1+ABS(n1),ABS(n1),(UWORD *)t2+ABS(n2),ABS(n2),den,&nden);
722
723 // divide out gcds; note: leading zeroes can be added here
724 for (int i=0; i<2; i++) {
725 if (i==1 && fr==0) break;
726
727 WORD *t = i==0 ? t1 : t2;
728 WORD n = i==0 ? n1 : n2;
729
730 DivLong_fix_args((UWORD *)t, n, num, nnum, tmp, &ntmp, dum, &ndum);
731 for (int j=0; j<ABS(ntmp); j++) *t++ = tmp[j];
732 for (int j=0; j<ABS(n)-ABS(ntmp); j++) *t++ = 0;
733
734 if (SGN(ntmp) != SGN(n)) n=-n;
735
736 DivLong_fix_args((UWORD *)t, n, den, nden, tmp, &ntmp, dum, &ndum);
737 for (int j=0; j<ABS(ntmp); j++) *t++ = tmp[j];
738 for (int j=0; j<ABS(n)-ABS(ntmp); j++) *t++ = 0;
739
740 *t++ = SGN(n) * (2*ABS(n)+1);
741 }
742
743 // add the addition operator
744 if (fr>0) res->push_back(OPER_ADD);
745
746 // add the power of "var"
747 WORD nextpow = (to==numterms ? 0 : getpower(terms[to], var, pos));
748
749 if (pow-nextpow > 0) {
750 res->push_back(SYMBOL);
751 res->push_back(4);
752 res->push_back(var);
753 res->push_back(pow-nextpow);
754 res->push_back(OPER_MUL);
755 }
756
757 // add the extracted gcd
758 res->push_back(SNUMBER);
759 WORD n = MaX(ABS(nnum),nden);
760 res->push_back(n*2+3);
761 for (int i=0; i<ABS(nnum); i++) res->push_back(num[i]);
762 for (int i=0; i<n-ABS(nnum); i++) res->push_back(0);
763 for (int i=0; i<nden; i++) res->push_back(den[i]);
764 for (int i=0; i<n-ABS(nden); i++) res->push_back(0);
765 res->push_back(SGN(nnum)*(2*n+1));
766 res->push_back(OPER_MUL);
767
768 prev_coeff_idx = res->size() - ABS(res->at(res->size()-2)) - 3;
769 }
770 else if (AN.poly_vars[var]==FACTORSYMBOL) {
771
772 // if factorsymbol, multiply overall integer contents
773
774 if (fr>0) {
775 WORD n1 = res->at(res->size()-2) / 2;
776 WORD *t1 = &res->at(res->size()-2-2*ABS(n1));
777 WORD *t2 = &res->at(prev_coeff_idx);
778 WORD n2 = *(t2+*(t2+1)-1) / 2;
779 t2+=2;
780
781 MulRat(BHEAD (UWORD *)t1,n1,(UWORD *)t2,n2,tmp,&ntmp);
782
783 // replace previous coefficient with 1
784 n2=ABS(n2);
785 for (int i=0; i<ABS(n2); i++)
786 t2[i] = t2[n2+i] = i==0 ? 1 : 0;
787 t2[2*n2] = 2*n2+1;
788
789 // remove this coefficient
790 for (int i=0; i<2*ABS(n1)+2; i++)
791 res->pop_back();
792
793 // add product
794 res->back() = 2*ABS(ntmp)+3; // adjust size of term
795 res->insert(res->end(), tmp, tmp+2*ABS(ntmp)); // num/den coefficient
796 res->push_back(SGN(ntmp)*(2*ABS(ntmp)+1)); // size of coefficient
797 res->push_back(OPER_MUL); // operator
798 }
799
800 prev_coeff_idx = res->size() - ABS(res->at(res->size()-2)) - 3;
801
802 // multiply previous factors with this factor
803 if (fr>0)
804 res->push_back(OPER_MUL);
805 }
806 else { // AN.poly_vars[var]==SEPARATESYMBOL
807 if (fr>0)
808 res->push_back(OPER_COMMA);
809 prev_coeff_idx = -1;
810 }
811
812 fr=to;
813 }
814
815 // cleanup
816 NumberFree(dum,"build_Horner_tree");
817 NumberFree(tmp,"build_Horner_tree");
818 NumberFree(den,"build_Horner_tree");
819 NumberFree(num,"build_Horner_tree");
820 }
821}
822
831bool term_compare (const WORD *a, const WORD *b) {
832 if (*a == ABS(*(a+*a-1))+1) return true; // coefficient comes first
833 if (*b == ABS(*(b+*b-1))+1) return false;
834 if (a[1]!=SYMBOL) return true;
835 if (b[1]!=SYMBOL) return false;
836 for (int i=3; i<a[2] && i<b[2]; i+=2) {
837 if (a[i] !=b[i] ) return a[i] >b[i];
838 if (a[i+1]!=b[i+1]) return a[i+1]<b[i+1];
839 }
840 return a[2]<b[2];
841}
842
852vector<WORD> Horner_tree (const WORD *expr, const vector<WORD> &order) {
853#ifdef DEBUG
854 MesPrint ("*** [%s, w=%w] CALL: Horner_tree(%a)", thetime_str().c_str(), order.size(), &order[0]);
855#endif
856
857 GETIDENTITY;
858
859 // find the renumbering scheme (new numbers are 0,1,...,#vars-1)
860 map<WORD,WORD> renum;
861 AN.poly_num_vars = order.size();
862 AN.poly_vars = (WORD *)Malloc1(AN.poly_num_vars*sizeof(WORD), "AN.poly_vars");
863 for (int i=0; i<AN.poly_num_vars; i++) {
864 AN.poly_vars[i] = order[i];
865 renum[order[i]] = i;
866 }
867
868 // sort variables in individual terms using bubble sort
869 WORD *sorted = AT.WorkPointer;
870
871 for (const WORD *t=expr; *t!=0; t+=*t) {
872 memcpy(sorted, t, *t*sizeof(WORD));
873
874 if (*t != ABS(*(t+*t-1))+1) {
875 // non-constant term
876
877 // renumber variables, adding new variables if necessary
878 for (int i=3; i<sorted[2]; i+=2) {
879 if (!renum.count(sorted[i])) {
880 renum[sorted[i]] = AN.poly_num_vars;
881
882 WORD *new_poly_vars = (WORD *)Malloc1((AN.poly_num_vars+1)*sizeof(WORD), "AN.poly_vars");
883 memcpy(new_poly_vars, AN.poly_vars, AN.poly_num_vars*sizeof(WORD));
884 M_free(AN.poly_vars,"poly_vars");
885 AN.poly_vars = new_poly_vars;
886 AN.poly_vars[AN.poly_num_vars] = sorted[i];
887 AN.poly_num_vars++;
888 }
889 sorted[i] = renum[sorted[i]];
890 }
891 // order variables
892 for (int i=0; i<sorted[2]/2; i++)
893 for (int j=3; j+2<sorted[2]; j+=2)
894 if (sorted[j] > sorted[j+2]) {
895 swap(sorted[j] , sorted[j+2]);
896 swap(sorted[j+1], sorted[j+3]);
897 }
898 }
899
900 sorted += *sorted;
901 }
902
903 *sorted = 0;
904 sorted = AT.WorkPointer;
905
906 // find pointers to all terms and sort them efficiently
907 vector<const WORD *> terms;
908 for (const WORD *t=sorted; *t!=0; t+=*t)
909 terms.push_back(t);
910 sort(terms.rbegin(),terms.rend(),term_compare);
911
912 // construct the Horner tree
913 vector<WORD> res;
914 build_Horner_tree(&terms[0], terms.size(), 0, AN.poly_num_vars, 0, &res);
915
916 // remove leading zeroes in coefficients
917 int j=0;
918 for (int i=0; i<(int)res.size();) {
919 if (res[i]==OPER_ADD || res[i]==OPER_MUL || res[i]==OPER_COMMA)
920 res[j++] = res[i++];
921 else if (res[i]==SYMBOL) {
922 memmove(&res[j], &res[i], res[i+1]*sizeof(WORD));
923 i+=res[j+1];
924 j+=res[j+1];
925 }
926 else if (res[i]==SNUMBER) {
927 int n = (res[i+1]-2)/2;
928 int dn = 0;
929 while (res[i+1+n-dn]==0 && res[i+1+2*n-dn]==0) dn++;
930 res[j++] = SNUMBER;
931 res[j++] = 2*(n-dn) + 3;
932 memmove(&res[j], &res[i+2], (n-dn)*sizeof(WORD));
933 j+=n-dn;
934 memmove(&res[j], &res[i+n+2], (n-dn)*sizeof(WORD));
935 j+=n-dn;
936 res[j++] = SGN(res[i+2*n+2])*(2*(n-dn)+1);
937 i+=2*n+3;
938 }
939 }
940 res.resize(j);
941
942#ifdef DEBUG
943 MesPrint ("*** [%s, w=%w] DONE: Horner_tree(%a)", thetime_str().c_str(), order.size(), &order[0]);
944#endif
945
946 return res;
947}
948
949/*
950 #] Horner_tree :
951 #[ print_tree :
952*/
953
954// print Horner tree (for debugging)
955void print_tree (const vector<WORD> &tree) {
956
957 GETIDENTITY;
958
959 for (int i=0; i<(int)tree.size();) {
960 if (tree[i]==OPER_ADD) {
961 MesPrint("+%");
962 i++;
963 }
964 else if (tree[i]==OPER_MUL) {
965 MesPrint("*%");
966 i++;
967 }
968 else if (tree[i]==OPER_COMMA) {
969 MesPrint(",%");
970 i++;
971 }
972 else if (tree[i]==SNUMBER) {
973 UBYTE buf[100];
974 int n = tree[i+tree[i+1]-1]/2;
975 PrtLong((UWORD *)&tree[i+2], n, buf);
976 int l = strlen((char *)buf);
977 buf[l]='/';
978 n=ABS(n);
979 PrtLong((UWORD *)&tree[i+2+n], n, buf+l+1);
980 MesPrint("%s%",buf);
981 i+=tree[i+1];
982 }
983 else if (tree[i]==SYMBOL) {
984 if (AN.poly_vars[tree[i+2]] < 10000)
985 MesPrint("%s^%d%", VARNAME(symbols,AN.poly_vars[tree[i+2]]), tree[i+3]);
986 else
987 MesPrint("Z%d^%d%", MAXVARIABLES-AN.poly_vars[tree[i+2]], tree[i+3]);
988 i+=tree[i+1];
989 }
990 else {
991 MesPrint("error");
992 exit(1);
993 }
994
995 MesPrint(" %");
996 }
997
998 MesPrint("");
999}
1000
1001/*
1002 #] print_tree :
1003 #[ generate_instructions :
1004*/
1005
1006
1007struct CSEHash {
1008 size_t operator()(const vector<WORD>& n) const {
1009 return n[0];
1010 }
1011};
1012
1013struct CSEEq {
1014 bool operator()(const vector<WORD>& lhs, const vector<WORD>& rhs) const {
1015 if (lhs.size() != rhs.size()) return false;
1016 for (unsigned int i = 1; i < lhs.size(); i++) {
1017 if (lhs[i] != rhs[i]) return false;
1018 }
1019 return true;
1020 }
1021};
1022
1023
1024template<typename T> size_t hash_range(T* array, int size) {
1025 size_t hash = 0;
1026
1027 for (int i = 0; i < size; i++) {
1028 hash ^= array[i] + 0x9e3779b9 + (hash << 6) + (hash >> 2);
1029 }
1030
1031 return hash;
1032}
1033
1086vector<WORD> generate_instructions (const vector<WORD> &tree, bool do_CSE) {
1087#ifdef DEBUG
1088 MesPrint ("*** [%s, w=%w] CALL: generate_instructions(cse=%d)",
1089 thetime_str().c_str(), do_CSE?1:0);
1090#endif
1091
1092 typedef unordered_map<vector<WORD>, int, CSEHash, CSEEq> csemap;
1093 csemap ID;
1094
1095 // reserve lots of space, to prevent later rehashes
1096 // TODO: what if this is too large? make a parameter?
1097 if (do_CSE) {
1098 ID.rehash(mcts_expr_score * 2);
1099 }
1100
1101 // s is a stack of operands to process when you encounter operators
1102 // in the postfix expression tree. Operands consist of three WORDs,
1103 // formatted as the LHS/RHS of the keys in ID.
1104 stack<int> s;
1105 vector<WORD> instr;
1106 WORD numinstr = 0;
1107 vector<WORD> x;
1108
1109 // process the expression tree
1110 for (int i=0; i<(int)tree.size();) {
1111 x.clear();
1112
1113 if (tree[i]==SNUMBER) {
1114 WORD hash = hash_range(&tree[i], tree[i + 1]);
1115 x.push_back(hash);
1116 x.push_back(SNUMBER);
1117 x.insert(x.end(),&tree[i],&tree[i]+tree[i+1]);
1118 int sign = SGN(x.back());
1119 x.back() *= sign;
1120
1121 std::pair<csemap::iterator, bool> suc = ID.insert(csemap::value_type(x, i + 1));
1122 s.push(0);
1123 s.push(sign * suc.first->second);
1124 s.push(SNUMBER);
1125 s.push(hash);
1126 i+=tree[i+1];
1127 }
1128 else if (tree[i]==SYMBOL) {
1129 WORD hash = hash_range(&tree[i], tree[i + 1]);
1130 if (tree[i+3]>1) {
1131 x.push_back(hash);
1132 x.push_back(SYMBOL);
1133 x.push_back(tree[i+2]+1); // variable (1-indexed)
1134 x.push_back(tree[i+3]); // power
1135
1136 csemap::iterator it = ID.find(x);
1137 if (do_CSE && it != ID.end()) {
1138 // already-seen power of a symbol
1139 s.push(1);
1140 s.push(it->second);
1141 s.push(EXTRASYMBOL);
1142 } else {
1143 //MesPrint("strange");
1144 if (numinstr == MAXPOSITIVE) {
1145 MesPrint((char *)"ERROR: too many temporary variables needed in optimization");
1146 Terminate(-1);
1147 }
1148
1149 // new power greater than 1 of a symbol
1150 instr.push_back(numinstr); // expr.nr
1151 instr.push_back(OPER_MUL); // operator
1152 instr.push_back(9+OPTHEAD); // length total
1153 instr.push_back(8); // length operand
1154 instr.push_back(SYMBOL); // SYMBOL
1155 instr.push_back(4); // length symbol
1156 instr.push_back(tree[i+2]); // variable
1157 instr.push_back(tree[i+3]); // power
1158 instr.push_back(1); // numerator
1159 instr.push_back(1); // denominator
1160 instr.push_back(3); // length coeff
1161 instr.push_back(0); // trailing 0
1162
1163 ID[x] = ++numinstr;
1164 s.push(1);
1165 s.push(numinstr);
1166 s.push(EXTRASYMBOL);
1167 }
1168 }
1169 else {
1170 // power of 1
1171 s.push(tree[i+3]); // power
1172 s.push(tree[i+2]+1); // variable (1-indexed)
1173 s.push(SYMBOL);
1174 }
1175
1176 s.push(hash); // push hash
1177 i+=tree[i+1];
1178 }
1179 else { // tree[i]==OPERATOR
1180 int oper = tree[i];
1181 i++;
1182
1183 x.push_back(0); // placeholder for hash
1184 x.push_back(oper);
1185 vector<WORD> hash;
1186 hash.push_back(oper);
1187
1188 // get two operands from the stack
1189 for (int operand=0; operand<2; operand++) {
1190 hash.push_back(s.top()); s.pop();
1191 x.push_back(s.top()); s.pop();
1192 x.push_back(s.top()); s.pop();
1193 x.push_back(s.top()); s.pop();
1194 }
1195
1196 x[0] = hash_range(&hash[0], 3);
1197
1198 // get rid of multiplications by +/-1
1199 if (oper==OPER_MUL) {
1200 bool do_continue = false;
1201
1202 for (int operand=0; operand<2; operand++) {
1203 int idx_oper1 = operand==0 ? 2 : 5;
1204 int idx_oper2 = operand==0 ? 5 : 2;
1205
1206 // check whether operand 1 equals +/-1
1207 if (x[idx_oper1]==SNUMBER) {
1208
1209 int idx = ABS(x[idx_oper1+1])-1;
1210 if (tree[idx+2]==1 && tree[idx+3]==1 && ABS(tree[idx+4])==3) {
1211 // push +/- other operand and continue
1212 s.push(x[idx_oper2+2]);
1213 s.push(x[idx_oper2+1]*SGN(x[idx_oper1+1]));
1214 s.push(x[idx_oper2]);
1215 s.push(hash[1 + (operand + 1) % 2]);
1216 do_continue = true;
1217 break;
1218 }
1219 }
1220 }
1221
1222 if (do_continue) continue;
1223 }
1224
1225 // check whether this subexpression has been seen before
1226 // if not, generate instruction to define it
1227 csemap::iterator it = ID.find(x);
1228 if (!do_CSE || it == ID.end()) {
1229 if (numinstr == MAXPOSITIVE) {
1230 MesPrint((char *)"ERROR: too many temporary variables needed in optimization");
1231 Terminate(-1);
1232 }
1233
1234 instr.push_back(numinstr); // expr.nr.
1235 instr.push_back(x[1]); // operator
1236 instr.push_back(3); // length
1237
1238 ID[x] = ++numinstr;
1239
1240 int lenidx = instr.size()-1;
1241
1242 for (int j=0; j<2; j++)
1243 if (x[3*j+2]==SYMBOL || x[3*j+2]==EXTRASYMBOL) {
1244 instr.push_back(8); // length total
1245 instr.push_back(x[3*j+2]); // (extra)symbol
1246 instr.push_back(4); // length (extra)symbol
1247 instr.push_back(ABS(x[3*j+3])-1); // variable (0-indexed)
1248 instr.push_back(x[3*j+4]); // power
1249 instr.push_back(1); // numerator
1250 instr.push_back(1); // denominator
1251 instr.push_back(3*SGN(x[3*j+3])); // length coeff
1252 instr[lenidx] += 8;
1253 }
1254 else { // x[3*j+1]==SNUMBER
1255 int t = ABS(x[3*j+3])-1;
1256 instr.push_back(tree[t+1]-1); // length number
1257 instr.insert(instr.end(), &tree[t+2], &tree[t]+tree[t+1]); // digits
1258 instr.back() *= SGN(instr.back()) * SGN(x[3*j+3]);
1259 instr[lenidx] += tree[t+1]-1;
1260 }
1261
1262 instr.push_back(0); // trailing 0
1263 instr[lenidx]++;
1264 }
1265
1266 // push new expression on the stack
1267 s.push(1);
1268 s.push(ID[x]);
1269 s.push(EXTRASYMBOL);
1270 s.push(x[0]); // push hash
1271 }
1272 }
1273
1274#ifdef DEBUG
1275 MesPrint ("*** [%s, w=%w] DONE: generate_instructions(cse=%d) : numoper=%d",
1276 thetime_str().c_str(), do_CSE?1:0, count_operators(instr));
1277#endif
1278
1279 return instr;
1280}
1281
1282/*
1283 #] generate_instructions :
1284 #[ count_operators_cse :
1285*/
1286
1287
1295int count_operators_cse (const vector<WORD> &tree) {
1296 //MesPrint ("*** [%s] Starting CSEE", thetime_str().c_str());
1297
1298 typedef unordered_map<vector<WORD>, int, CSEHash, CSEEq> csemap;
1299 csemap ID;
1300
1301 // reserve lots of space, to prevent later rehashes
1302 // TODO: what if this is too large? make a parameter?
1303 ID.rehash(mcts_expr_score * 2);
1304
1305 // s is a stack of operands to process when you encounter operators
1306 // in the postfix expression tree. Operands consist of three WORDs,
1307 // formatted as the LHS/RHS of the keys in ID.
1308 stack<int> s;
1309 int numinstr = 0, numcommas = 0;
1310 vector<WORD> x;
1311
1312 // process the expression tree
1313 for (int i=0; i<(int)tree.size();) {
1314 x.clear();
1315
1316 if (tree[i]==SNUMBER) {
1317 WORD hash = hash_range(&tree[i], tree[i + 1]);
1318 x.push_back(hash);
1319 x.push_back(SNUMBER);
1320 x.insert(x.end(),&tree[i],&tree[i]+tree[i+1]);
1321 int sign = SGN(x.back());
1322 x.back() *= sign;
1323
1324 std::pair<csemap::iterator, bool> suc = ID.insert(csemap::value_type(x, i + 1));
1325 s.push(0);
1326 s.push(sign * suc.first->second);
1327 s.push(SNUMBER);
1328 s.push(hash);
1329 i+=tree[i+1];
1330 }
1331 else if (tree[i]==SYMBOL) {
1332 WORD hash = hash_range(&tree[i], tree[i + 1]);
1333 if (tree[i+3]>1) {
1334 x.push_back(hash);
1335 x.push_back(SYMBOL);
1336 x.push_back(tree[i+2]+1); // variable (1-indexed)
1337 x.push_back(tree[i+3]); // power
1338
1339 csemap::iterator it = ID.find(x);
1340 if (it != ID.end()) {
1341 // already-seen power of a symbol
1342 s.push(1);
1343 s.push(it->second);
1344 s.push(EXTRASYMBOL);
1345 } else {
1346 if (tree[i + 3] == 2)
1347 numinstr++;
1348 else
1349 numinstr += (int)floor(log(tree[i+3])/log(2.0)) + popcount(tree[i+3]) - 1;
1350
1351 ID[x] = numinstr;
1352 s.push(1);
1353 s.push(numinstr);
1354 s.push(EXTRASYMBOL);
1355 }
1356 }
1357 else {
1358 // power of 1
1359 s.push(tree[i+3]); // power
1360 s.push(tree[i+2]+1); // variable (1-indexed)
1361 s.push(SYMBOL);
1362 }
1363
1364 s.push(hash); // push hash
1365
1366 i+=tree[i+1];
1367 }
1368 else { // tree[i]==OPERATOR
1369 int oper = tree[i];
1370 i++;
1371
1372 x.push_back(0); // placeholder for hash
1373 x.push_back(oper);
1374 vector<WORD> hash;
1375 hash.push_back(oper);
1376
1377 // get two operands from the stack
1378 for (int operand=0; operand<2; operand++) {
1379 hash.push_back(s.top()); s.pop();
1380 x.push_back(s.top()); s.pop();
1381 x.push_back(s.top()); s.pop();
1382 x.push_back(s.top()); s.pop();
1383 }
1384
1385
1386 x[0] = hash_range(&hash[0], 3);
1387
1388 // get rid of multiplications by +/-1
1389 if (oper==OPER_MUL) {
1390 bool do_continue = false;
1391
1392 for (int operand=0; operand<2; operand++) {
1393 int idx_oper1 = operand==0 ? 2 : 5;
1394 int idx_oper2 = operand==0 ? 5 : 2;
1395
1396 // check whether operand 1 equals +/-1
1397 if (x[idx_oper1]==SNUMBER) {
1398
1399 int idx = ABS(x[idx_oper1+1])-1;
1400 if (tree[idx+2]==1 && tree[idx+3]==1 && ABS(tree[idx+4])==3) {
1401 // push +/- other operand and continue
1402 s.push(x[idx_oper2+2]);
1403 s.push(x[idx_oper2+1]*SGN(x[idx_oper1+1]));
1404 s.push(x[idx_oper2]);
1405 s.push(hash[1 + (operand + 1) % 2]);
1406 do_continue = true;
1407 break;
1408 }
1409 }
1410 }
1411
1412 if (do_continue) continue;
1413 }
1414
1415 // check whether this subexpression has been seen before
1416 // if not, generate instruction to define it
1417 csemap::iterator it = ID.find(x);
1418 if (it == ID.end()) {
1419 if (numinstr == MAXPOSITIVE) {
1420 MesPrint((char *)"ERROR: too many temporary variables needed in optimization");
1421 Terminate(-1);
1422 }
1423
1424 if (oper == OPER_COMMA) numcommas++;
1425 ID[x] = ++numinstr;
1426 s.push(1);
1427 s.push(numinstr);
1428 s.push(EXTRASYMBOL);
1429 } else {
1430 // push new expression on the stack
1431 s.push(1);
1432 s.push(it->second);
1433 s.push(EXTRASYMBOL);
1434 }
1435
1436 s.push(x[0]); // push hash
1437 }
1438 }
1439
1440 //MesPrint ("*** [%s] Stopping CSEE", thetime_str().c_str());
1441 return numinstr - numcommas;
1442}
1443
1444/*
1445 #] count_operators_cse :
1446 #[ count_operators_cse_topdown :
1447*/
1448
1449typedef struct node {
1450 const WORD* data;
1451 struct node* l;
1452 struct node* r; // TODO: add l,r to data?
1453 WORD sign; // TODO: use data for this?
1454 UWORD hash;
1455
1456 node() : l(NULL), r(NULL), sign(1), hash(0) {};
1457 node(const WORD* data) : data(data), l(NULL), r(NULL), sign(1), hash(0) {};
1458
1459 // a minus sign in the tree should only count as a different entry if
1460 // it is a compound expression: a = -a, but T+-V != T+V
1461 int cmp(const struct node* rhs) const {
1462 if (this == rhs) return 0;
1463
1464 if (data[0] != rhs->data[0]) return data[0] < rhs->data[0] ? -1 : 1;
1465 int mod = data[0] == SNUMBER ? -1 : 0; // don't check sign, for numbers
1466 if (data[0] == SYMBOL || data[0] == SNUMBER) {
1467 for (int i = 0; i < data[1] + mod; i++) {
1468 if (data[i] != rhs->data[i]) return data[i] < rhs->data[i] ? -1 : 1;
1469 }
1470 } else {
1471 int lv = l->cmp(rhs->l);
1472 if (lv != 0) return lv;
1473 int rv = r->cmp(rhs->r);
1474 if (rv != 0) return rv;
1475
1476 // TODO: only for ADD operation
1477 if (l->sign != rhs->l->sign) return l->sign < rhs->l->sign ? -1 : 1;
1478 if (r->sign != rhs->r->sign) return r->sign < rhs->r->sign ? -1 : 1;
1479 }
1480
1481 return 0;
1482 }
1483
1484 // less than operator
1485 bool operator() (const struct node* lhs, const struct node* rhs) const
1486 {
1487 return lhs->cmp(rhs) < 0;
1488 }
1489
1490 void calcHash() {
1491 int mod = data[0] == SNUMBER ? -1 : 0; // don't check sign, for numbers
1492 if (data[0] == SYMBOL || data[0] == SNUMBER) {
1493 hash = hash_range(data, data[1] + mod);
1494 } else {
1495 if (l->hash == 0) l->calcHash();
1496 if (r->hash == 0) r->calcHash();
1497
1498 // signs only matter for compound expressions
1499 size_t newr[] = {(size_t)data[0], l->hash, (size_t)l->sign, r->hash, (size_t)r->sign};
1500 hash = hash_range(newr, 5);
1501 }
1502 }
1503} NODE;
1504
1505struct NodeHash {
1506 size_t operator()(const NODE* n) const {
1507 return n->hash; // already computed
1508 }
1509};
1510
1511struct NodeEq {
1512 bool operator()(const NODE* lhs, const NODE* rhs) const {
1513 return lhs->cmp(rhs) == 0;
1514 }
1515};
1516
1517NODE* buildTree(vector<WORD> &tree) {
1518 //MesPrint ("*** [%s] Starting CSEE topdown", thetime_str().c_str());
1519
1520 // allocate spaces for the tree, cannot be more nodes than tree size
1521 NODE* ar = (NODE*)Malloc1(tree.size() * sizeof(NODE), "CSE tree");
1522 NODE* c = 0;
1523 unsigned int curIndex = 0;
1524
1525 stack<NODE*> st;
1526 for (int i=0; i<(int)tree.size();) {
1527 c = ar + curIndex;
1528 new (c) NODE(&tree[i]); // placement new
1529 curIndex++;
1530
1531 if (tree[i]==SYMBOL || tree[i] == SNUMBER) {
1532 // extract the sign to a new class member
1533 if (tree[i] == SNUMBER) {
1534 c->sign = SGN(tree[i + tree[i + 1] -1]);
1535 }
1536
1537 c->calcHash();
1538 st.push(c);
1539 i+=tree[i+1];
1540 } else {
1541 c->r = st.top(); st.pop();
1542 c->l = st.top(); st.pop();
1543
1544 // filter *1 and *-1
1545 // TODO: also multiply if there are two numbers?
1546 if (c->data[0] == OPER_MUL) {
1547 NODE* ch[] = {c->r, c->l};
1548 for (int j = 0; j < 2; j++)
1549 if (ch[j]->data[0] == SNUMBER && ch[j]->data[1] == 5 && ch[j]->data[2]==1 && ch[j]->data[3]==1) {
1550 ch[(j+1)%2]->sign *= ch[j]->sign; // transfer sign
1551 c = ch[(j+1)%2];
1552 break;
1553 }
1554 }
1555
1556 c->calcHash();
1557 st.push(c);
1558 i++;
1559 }
1560 }
1561
1562 // TODO: reallocate to smaller size? Could save memory
1563 //MesPrint("Memory difference: %d vs %d", curIndex, tree.size());
1564
1565 // we want to make the root of the tree the first element
1566 // so that we can easily free the array.
1567 // we swap the first element with the root
1568 // we need to change the pointer in the operator node that has this element as a child
1569 // TODO: check performance
1570 for (unsigned int i = 0; i < curIndex; i++) {
1571 if (ar[i].l == ar) ar[i].l = st.top();
1572 if (ar[i].r == ar) ar[i].r = st.top();
1573 }
1574
1575 swap(ar[0], *st.top());
1576 return ar;
1577}
1578
1579int count_operators_cse_topdown (vector<WORD> &tree) {
1580 typedef unordered_set<NODE*, NodeHash, NodeEq> nodeset;
1581 nodeset ID;
1582
1583 // reserve lots of space, to prevent later rehashes
1584 // TODO: what if this is too large? make a parameter?
1585 ID.rehash(mcts_expr_score * 2);
1586
1587 int numinstr = 0;
1588
1589 NODE* root = buildTree(tree);
1590
1591 stack<NODE*> stack;
1592 stack.push(root);
1593 while (!stack.empty())
1594 {
1595 NODE* c = stack.top();
1596 stack.pop();
1597
1598 if (c->data[0] == SYMBOL) {
1599 if (c->data[3] > 1) {
1600 std::pair<nodeset::iterator, bool> suc = ID.insert(c);
1601 if (suc.second) { // new
1602 if (c->data[3] == 2)
1603 numinstr++;
1604 else
1605 numinstr += (int)floor(log(c->data[3])/log(2.0)) + popcount(c->data[3]) - 1;
1606 }
1607 }
1608 } else {
1609 if (c->data[0] != SNUMBER) {
1610 // operator
1611 std::pair<nodeset::iterator, bool> suc = ID.insert(c);
1612 if (suc.second) {
1613 stack.push(c->r);
1614 stack.push(c->l);
1615
1616 // ignore OPER_COMMA
1617 if (c->data[0] == OPER_MUL || c->data[0] == OPER_ADD)
1618 numinstr++;
1619 }
1620 }
1621 }
1622 }
1623
1624 //MesPrint ("*** [%s] Stopping CSEE", thetime_str().c_str());
1625 M_free(root, "CSE tree");
1626
1627 return numinstr;
1628}
1629
1630/*
1631 #] count_operators_cse_topdown :
1632 #[ simulated_annealing :
1633*/
1634vector<WORD> simulated_annealing() {
1635 float minT = AO.Optimize.saMinT.fval;
1636 float maxT = AO.Optimize.saMaxT.fval;
1637 float T = maxT;
1638 float coolrate = pow(minT / maxT, 1 / (float)AO.Optimize.saIter);
1639
1640 GETIDENTITY;
1641
1642 // create a valid state where FACTORSYMBOL/SEPARATESYMBOL remains first
1643 vector<WORD> state = occurrence_order(optimize_expr, false);
1644 int startindex = 0;
1645 if ((state.size() > 0 && state[0] == SEPARATESYMBOL) || (state.size() > 1 && state[1] == FACTORSYMBOL)) startindex++;
1646 if (state.size() > 1 && state[1] == FACTORSYMBOL) startindex++;
1647
1648 my_random_shuffle(BHEAD state.begin() + startindex, state.end()); // start from random scheme
1649
1650 vector<WORD> tree = Horner_tree(optimize_expr, state);
1651 int curscore = count_operators_cse_topdown(tree);
1652
1653 // clean poly_vars, that are allocated by Horner_tree
1654 AN.poly_num_vars = 0;
1655 M_free(AN.poly_vars,"poly_vars");
1656
1657 std::vector<WORD> best = state; // best state
1658 int bestscore = curscore;
1659
1660 if (startindex == (int)state.size() || (int)state.size() - startindex < 2) {
1661 return best;
1662 }
1663
1664 for (int o = 0; o < AO.Optimize.saIter; o++) {
1665 int inda = iranf(BHEAD state.size() - startindex) + startindex;
1666 int indb = iranf(BHEAD state.size() - startindex) + startindex;
1667
1668 if (inda == indb) {
1669 continue;
1670 }
1671
1672 swap(state[inda], state[indb]); // swap works best for Horner
1673
1674 vector<WORD> tree = Horner_tree(optimize_expr, state);
1675 int newscore = count_operators_cse_topdown(tree);
1676
1677 // clean poly_vars, that are allocated by Horner_tree
1678 AN.poly_num_vars = 0;
1679 M_free(AN.poly_vars,"poly_vars");
1680
1681 if (newscore <= curscore || 2.0 * wranf(BHEAD0) / (float)(UWORD)(-1) < exp((curscore - newscore) / T)) {
1682 curscore = newscore;
1683
1684 if (curscore < bestscore) {
1685 bestscore = curscore;
1686 best = state;
1687 }
1688 } else {
1689 swap(state[inda], state[indb]);
1690 }
1691
1692#ifdef DEBUG_SA
1693 MesPrint("Score at step %d: %d", o, curscore);
1694#endif
1695 T *= coolrate;
1696 }
1697
1698#ifdef DEBUG_SA
1699 MesPrint("Simulated annealing score: %d", bestscore);
1700#endif
1701
1702 return best;
1703}
1704
1705/*
1706 #] simulated_annealing :
1707 #[ printpstree :
1708*/
1709
1710/*
1711// print MCTS tree with LaTeX/pstricks (for analysis)
1712void printpstree_rec (tree_node x, string pre="") {
1713
1714 if (x.num_visits==1) {
1715 MesPrint("%s\\TR{%d}",pre.c_str(),x.var);
1716 }
1717 else {
1718 MesPrint("%s\\pstree%s{\\TR{%d}}{",pre.c_str(),
1719 pre==" "?"[nodesep=0, levelsep=40]":"",
1720 x.var);
1721 for (int i=0; i<(int)x.childs.size(); i++)
1722 if (x.childs[i].num_visits>0)
1723 printpstree_rec(x.childs[i], pre+" ");
1724 MesPrint("%s}",pre.c_str());
1725 }
1726}
1727
1728void printpstree () {
1729 // draw tree with pstricks
1730 MesPrint ("\\documentclass{article}");
1731 MesPrint ("\\usepackage{pstricks,pst-node,pst-tree,graphicx}");
1732 MesPrint ("\\begin{document}");
1733 MesPrint ("\\scalebox{0.02}{");
1734 printpstree_rec(mcts_root," ");
1735 MesPrint ("}");
1736 MesPrint ("\\end{document}");
1737}
1738*/
1739
1740/*
1741 #] printpstree :
1742 #[ find_Horner_MCTS_expand_tree :
1743*/
1744
1776/*
1777 #[ next_MCTS_scheme :
1778*/
1779
1780// find a Horner scheme to be used for the next simulation
1781inline static void next_MCTS_scheme (PHEAD vector<WORD> *porder, vector<WORD> *pscheme, vector<tree_node *> *ppath) {
1782
1783 vector<WORD> &order = *porder;
1784 vector<WORD> &schemev = *pscheme;
1785 vector<tree_node *> &path = *ppath;
1786 int depth = 0, nchild0;
1787 float slide_down_factor = 1.0;
1788
1789 order.clear();
1790 path.clear();
1791
1792 // MCTS step I: select
1793 tree_node *select = &mcts_root;
1794 path.push_back(select);
1795 nchild0 = select->childs.size();
1796 while (select->childs.size() > 0) {
1797 // add virtual loss
1798 select->num_visits++;
1799 select->sum_results+=mcts_expr_score;
1800
1801//-------------------------------------------------------------------
1802 switch ( AO.Optimize.mctsdecaymode ) {
1803 case 1: // Based on http://arxiv.org/abs/arXiv:1312.0841
1804 slide_down_factor = 1.0-(1.0*AT.optimtimes)/(1.0*AO.Optimize.mctsnumexpand);
1805 break;
1806 case 2: // This gives a bit more cleanup time at the end.
1807 if ( 2*AT.optimtimes < AO.Optimize.mctsnumexpand ) {
1808 slide_down_factor = 1.0*(AO.Optimize.mctsnumexpand-2*AT.optimtimes);
1809 slide_down_factor /= 1.0*AO.Optimize.mctsnumexpand;
1810 }
1811 else {
1812 slide_down_factor = 0.0001;
1813 }
1814 break;
1815 case 3: // depth dependent factor combined with case 1
1816 float dd = 1.0-(1.0*depth)/(1.0*nchild0);
1817 slide_down_factor = 1.0-(1.0*AT.optimtimes)/(1.0*AO.Optimize.mctsnumexpand);
1818 if ( dd <= 0.000001 ) slide_down_factor = 1.0;
1819 else slide_down_factor /= dd;
1820 if ( slide_down_factor > 1.0 ) slide_down_factor = 1.0;
1821 break;
1822 }
1823//-------------------------------------------------------------------
1824
1825#ifdef DEBUG_MCTS
1826 MesPrint("select %d",select->var);
1827#endif
1828
1829 // find most-promising node
1830 double best=0;
1831 tree_node *next=NULL;
1832 for (vector<tree_node>::iterator p=select->childs.begin(); p<select->childs.end(); p++) {
1833 double score;
1834 if (p->num_visits >= 1) {
1835
1836 // there are results calculated, so select with the UCT formula
1837 score = mcts_expr_score / (p->sum_results/p->num_visits) +
1838//-------------------------------------------------------------------------
1839 slide_down_factor *
1840//-------------------------------------------------------------------------
1841 2 * AO.Optimize.mctsconstant.fval * sqrt(2*log(select->num_visits) / p->num_visits);
1842
1843#ifdef DEBUG_MCTS
1844 printf("%d: %.2lf [x=%.2lf n=%d fin=%i]\n",p->var,score,mcts_expr_score / (p->sum_results/p->num_visits),
1845 p->num_visits,p->finished?1:0);
1846 fflush(stdout);
1847#endif
1848 }
1849 else {
1850 // no results yet, so select this node by setting score=infinite
1851 score = 1e100;
1852
1853#ifdef DEBUG_MCTS
1854 printf("%d: inf\n",p->var); fflush(stdout);
1855#endif
1856 }
1857
1858 // update best candidate
1859 if (!p->finished && score>best) {
1860 best=score;
1861 next=&*p;
1862 }
1863 }
1864
1865 // if no node is found, this node is finished
1866 if (next==NULL) {
1867 select->finished=true;
1868 break;
1869 }
1870
1871 // traverse down the tree
1872 select = next;
1873 path.push_back(select);
1874 order.push_back(select->var);
1875 depth++;
1876 }
1877
1878 // MCTS step II: expand
1879
1880#ifdef DEBUG_MCTS
1881 MesPrint("expand %d",select->var);
1882#endif
1883
1884 // variables used so far
1885 set<WORD> var_used;
1886
1887 for (int i=0; i<(int)order.size(); i++)
1888 var_used.insert(ABS(order[i])-1);
1889
1890 // if this a new node, create node and add children
1891 if (!select->finished && select->childs.size()==0) {
1892 tree_node new_node(select->var);
1893 int sign = (order.size() == 0) ? 1 : SGN(order.back());
1894 for (int i=0; i<(int)mcts_vars.size(); i++)
1895 if (!var_used.count(mcts_vars[i])) {
1896 new_node.childs.push_back(tree_node(sign*(mcts_vars[i]+1)));
1897 if (AO.Optimize.hornerdirection==O_FORWARDANDBACKWARD)
1898 new_node.childs.push_back(tree_node(-sign*(mcts_vars[i]+1)));
1899 }
1900 my_random_shuffle(BHEAD new_node.childs.begin(), new_node.childs.end());
1901
1902 // here locking is necessary, since operator=(tree_node) is a
1903 // non-atomic operation (using pointers makes this lock obsolete)
1904 LOCK(optimize_lock);
1905 *select = new_node;
1906 UNLOCK(optimize_lock);
1907 }
1908 // set finished if necessary
1909 if (select->childs.size()==0)
1910 select->finished = true;
1911
1912 // add virtual loss of number of operators in original expression
1913 select->num_visits++;
1914 select->sum_results+=mcts_expr_score;
1915
1916 // MCTS step III: simulation
1917
1918 // create complete Horner scheme
1919 deque<WORD> scheme;
1920
1921 for (int i=0; i<(int)mcts_vars.size(); i++)
1922 if (!var_used.count(mcts_vars[i]))
1923 scheme.push_back(mcts_vars[i]);
1924 my_random_shuffle(BHEAD scheme.begin(), scheme.end());
1925
1926 for (int i=(int)order.size()-1; i>=0; i--) {
1927 if (order[i] > 0)
1928 scheme.push_front(order[i]-1);
1929 else
1930 scheme.push_back(-order[i]-1);
1931 }
1932
1933 // add FACTORSYMBOL/SEPARATESYMBOL is necessary
1934 if (mcts_factorized)
1935 scheme.push_front(FACTORSYMBOL);
1936 if (mcts_separated)
1937 scheme.push_front(SEPARATESYMBOL);
1938
1939 // Horner scheme as a vector
1940 schemev = vector<WORD>(scheme.begin(), scheme.end());
1941
1942}
1943
1944/*
1945 #] next_MCTS_scheme :
1946 #[ try_MCTS_scheme :
1947*/
1948
1949// count the number of operators in the given Horner scheme
1950inline static void try_MCTS_scheme (PHEAD const vector<WORD> &scheme, int *pnum_oper) {
1951
1952 // do Horner, CSE and count the number of operators
1953 vector<WORD> tree = Horner_tree(optimize_expr, scheme);
1954 //vector<WORD> instr = generate_instructions(tree, true);
1955 //int num_oper = count_operators(instr);
1956 //int num_oper2 = count_operators_cse(tree);
1957 //int num_oper2 = count_operators_cse_topdown(tree);
1958 //MesPrint("%d %d", num_oper, num_oper2);
1959
1960 int num_oper = count_operators_cse_topdown(tree);
1961
1962 // clean poly_vars, that is allocated by Horner_tree
1963 AN.poly_num_vars = 0;
1964 M_free(AN.poly_vars,"poly_vars");
1965
1966 *pnum_oper = num_oper;
1967
1968}
1969
1970/*
1971 #] try_MCTS_scheme :
1972 #[ update_MCTS_scheme :
1973*/
1974
1975// update the best score and the search tree
1976inline static void update_MCTS_scheme (int num_oper, const vector<WORD> &scheme, vector<tree_node *> *ppath) {
1977
1978 vector<tree_node *> &path = *ppath;
1979
1980 // update the (global) list of best Horner scheme
1981 if ((int)mcts_best_schemes.size() < AO.Optimize.mctsnumkeep ||
1982 (--mcts_best_schemes.end())->first > num_oper) {
1983 // here locking is necessary, for otherwise best_schemes may
1984 // become corrupted; lock can be prevented if each thread keeps
1985 // track of it's own list and those lists are merged in the end,
1986 // but this seems not useful to implement
1987 LOCK(optimize_lock);
1988 mcts_best_schemes.insert(make_pair(num_oper,scheme));
1989 if ((int)mcts_best_schemes.size() > AO.Optimize.mctsnumkeep)
1990 mcts_best_schemes.erase(--mcts_best_schemes.end());
1991 UNLOCK(optimize_lock);
1992 }
1993
1994 // MCTS step IV: backpropagate
1995
1996 // add number of operator and subtract mcts_expr_score, which
1997 // behaves as a "virtual loss"
1998 for (vector<tree_node *>::iterator p=path.begin(); p<path.end(); p++)
1999 (*p)->sum_results += num_oper - mcts_expr_score;
2000
2001}
2002
2003/*
2004 #] update_MCTS_scheme :
2005*/
2006
2007void find_Horner_MCTS_expand_tree () {
2008
2009#ifdef DEBUG
2010 MesPrint ("*** [%s, w=%w] CALL: find_Horner_MCTS_expand_tree", thetime_str().c_str());
2011#endif
2012
2013 GETIDENTITY;
2014
2015 // the order for the Horner scheme up to the selected node, with signs
2016 // indicating forward or backward.
2017 vector<WORD> order;
2018
2019 // complete Horner scheme
2020 vector<WORD> scheme;
2021
2022 // path to the selected node
2023 vector<tree_node *> path;
2024
2025 // the number of operations obtained by the simulation
2026 int num_oper;
2027
2028 next_MCTS_scheme(BHEAD &order, &scheme, &path);
2029 try_MCTS_scheme(BHEAD scheme, &num_oper);
2030#ifdef DEBUG_MCTS
2031 // Actually "order" is needed only for this debug output.
2032 MesPrint ("{%a} -> {%a} -> %d", order.size(), &order[0], scheme.size(), &scheme[0], num_oper);
2033#endif
2034 update_MCTS_scheme(num_oper, scheme, &path);
2035
2036#ifdef DEBUG
2037 MesPrint ("*** [%s, w=%w] DONE: find_Horner_MCTS_expand_tree(%a-> %d)",
2038 thetime_str().c_str(), scheme.size(), &scheme[0], num_oper);
2039#endif
2040
2041}
2042
2043/*
2044 #] find_Horner_MCTS_expand_tree :
2045 #[ PF_find_Horner_MCTS_expand_tree :
2046*/
2047#ifdef WITHMPI
2048
2049// To remember which task is assigned to each slave. A task is represented by
2050// a pair of a Horner scheme and the selected path for the scheme.
2051// The index range is from 1 to PF.numtasks-1.
2052vector<pair<vector<WORD>, vector<tree_node *> > > PF_opt_MCTS_tasks;
2053
2054// Initialization.
2055void PF_find_Horner_MCTS_expand_tree_master_init () {
2056
2057 PF_opt_MCTS_tasks.resize(PF.numtasks);
2058 for (int i = 1; i < PF.numtasks; i++) {
2059 pair<vector<WORD>, vector<tree_node *> > &p = PF_opt_MCTS_tasks[i];
2060 p.first.clear();
2061 p.second.clear();
2062 }
2063
2064}
2065
2066// Wait for an idle slave and return the process number.
2067int PF_find_Horner_MCTS_expand_tree_master_next () {
2068
2069 // Find an idle slave.
2070 int next;
2071 PF_Receive(PF_ANY_SOURCE, PF_OPT_MCTS_MSGTAG, &next, NULL);
2072
2073 // Check if the slave had a task.
2074 pair<vector<WORD>, vector<tree_node *> > &p = PF_opt_MCTS_tasks[next];
2075 if (!p.first.empty()) {
2076 // If so, update the result.
2077 int num_oper;
2078 PF_Unpack(&num_oper, 1, PF_INT);
2079 update_MCTS_scheme(num_oper, p.first, &p.second);
2080
2081 // Clear the task.
2082 p.first.clear();
2083 p.second.clear();
2084 }
2085
2086 return next;
2087
2088}
2089
2090// The main function on the master.
2091void PF_find_Horner_MCTS_expand_tree_master () {
2092
2093#ifdef DEBUG
2094 MesPrint ("*** [%s, w=%w] CALL: PF_find_Horner_MCTS_expand_tree_master", thetime_str().c_str());
2095#endif
2096
2097 vector<WORD> order;
2098 vector<WORD> scheme;
2099 vector<tree_node *> path;
2100
2101 next_MCTS_scheme(BHEAD &order, &scheme, &path);
2102
2103 // Find an idle slave.
2104 int next = PF_find_Horner_MCTS_expand_tree_master_next();
2105
2106 // Send a new task to the slave.
2108 int len = scheme.size();
2109 PF_LongSinglePack(&len, 1, PF_INT);
2110 PF_LongSinglePack(&scheme[0], len, PF_WORD);
2111 PF_LongSingleSend(next, PF_OPT_MCTS_MSGTAG);
2112
2113 // Remember the task.
2114 pair<vector<WORD>, vector<tree_node *> > &p = PF_opt_MCTS_tasks[next];
2115 p.first = scheme;
2116 p.second = path;
2117
2118#ifdef DEBUG
2119 MesPrint ("*** [%s, w=%w] DONE: PF_find_Horner_MCTS_expand_tree_master", thetime_str().c_str());
2120#endif
2121
2122}
2123
2124// Wait for all the slaves to finish their tasks.
2125void PF_find_Horner_MCTS_expand_tree_master_wait () {
2126
2127#ifdef DEBUG
2128 MesPrint ("*** [%s, w=%w] CALL: PF_find_Horner_MCTS_expand_tree_master_wait", thetime_str().c_str());
2129#endif
2130
2131 // Wait for all the slaves.
2132 for (int i = 1; i < PF.numtasks; i++) {
2133 int next = PF_find_Horner_MCTS_expand_tree_master_next();
2134 // Send a null task.
2136 int len = 0;
2137 PF_LongSinglePack(&len, 1, PF_INT);
2138 PF_LongSingleSend(next, PF_OPT_MCTS_MSGTAG);
2139 }
2140
2141#ifdef DEBUG
2142 MesPrint ("*** [%s, w=%w] DONE: PF_find_Horner_MCTS_expand_tree_master_wait", thetime_str().c_str());
2143#endif
2144
2145}
2146
2147// The main function on the slaves.
2148void PF_find_Horner_MCTS_expand_tree_slave () {
2149
2150#ifdef DEBUG
2151 MesPrint ("*** [%s, w=%w] CALL: PF_find_Horner_MCTS_expand_tree_slave", thetime_str().c_str());
2152#endif
2153
2154 vector<WORD> scheme;
2155
2156 {
2157 // Send the first message to the master, which indicates I am idle.
2159 int dummy = 0;
2160 PF_Pack(&dummy, 1, PF_INT);
2161 PF_Send(MASTER, PF_OPT_MCTS_MSGTAG);
2162 }
2163
2164 for (;;) {
2165 // Get a task from the master.
2166 PF_LongSingleReceive(MASTER, PF_OPT_MCTS_MSGTAG, NULL, NULL);
2167
2168 // Length of the task.
2169 int len;
2170 PF_LongSingleUnpack(&len, 1, PF_INT);
2171
2172 // No task remains.
2173 if (len == 0) break;
2174
2175 // Perform the given task.
2176 scheme.resize(len);
2177 PF_LongSingleUnpack(&scheme[0], len, PF_WORD);
2178 int num_oper;
2179 try_MCTS_scheme(scheme, &num_oper);
2180
2181 // Send the result to the master.
2183 PF_Pack(&num_oper, 1, PF_INT);
2184 PF_Send(MASTER, PF_OPT_MCTS_MSGTAG);
2185 }
2186
2187#ifdef DEBUG
2188 MesPrint ("*** [%s, w=%w] DONE: PF_find_Horner_MCTS_expand_tree_slave", thetime_str().c_str());
2189#endif
2190
2191}
2192
2193#endif
2194/*
2195 #] PF_find_Horner_MCTS_expand_tree :
2196 #[ find_Horner_MCTS :
2197*/
2198
2207//vector<vector<WORD> > find_Horner_MCTS () {
2209
2210#ifdef WITHMPI
2211 if (PF.me != MASTER) {
2212 if (PF.numtasks <= 1) return;
2213 PF_find_Horner_MCTS_expand_tree_slave();
2214 return;
2215 }
2216#endif
2217
2218#ifdef DEBUG
2219 MesPrint ("*** [%s, w=%w] CALL: find_Horner_MCTS", thetime_str().c_str());
2220#endif
2221
2222 GETIDENTITY;
2223
2224 LONG start_time = TimeWallClock(1);
2225
2226 // initialize the used global variables
2227 mcts_expr_score = count_operators(optimize_expr);
2228 mcts_root = tree_node();
2229
2230 // extract all symbols from the expression
2231 set<WORD> var_set;
2232 for (WORD *t=optimize_expr; *t!=0; t+=*t)
2233 if (t[1] == SYMBOL)
2234 for (int i=3; i<t[2]; i+=2)
2235 var_set.insert(t[i]);
2236
2237 // check for factorized/separated expression and make sure that
2238 // FACTORSYMBOL/SEPARATESYMBOL isn't included in the MCTS
2239 mcts_factorized = var_set.count(FACTORSYMBOL);
2240 if (mcts_factorized) var_set.erase(FACTORSYMBOL);
2241 mcts_separated = var_set.count(SEPARATESYMBOL);
2242 if (mcts_separated) var_set.erase(SEPARATESYMBOL);
2243
2244 mcts_vars = vector<WORD>(var_set.begin(), var_set.end());
2245 optimize_num_vars = (int)mcts_vars.size();
2246 // initialize MCTS tree root
2247 for (int i=0; i<(int)mcts_vars.size(); i++) {
2248 if (AO.Optimize.hornerdirection != O_BACKWARD)
2249 mcts_root.childs.push_back(tree_node(+(mcts_vars[i]+1)));
2250 if (AO.Optimize.hornerdirection != O_FORWARD)
2251 mcts_root.childs.push_back(tree_node(-(mcts_vars[i]+1)));
2252 }
2253 my_random_shuffle(BHEAD mcts_root.childs.begin(), mcts_root.childs.end());
2254
2255#if defined(WITHMPI)
2256 PF_find_Horner_MCTS_expand_tree_master_init();
2257#endif
2258
2259 // initialize a potential variable mctsconstant scheme.
2260 AT.optimtimes = 0;
2261
2262 // call expand_tree until it is called "mctsnumexpand" times, the
2263 // time limit is reached or the tree is fully finished
2264 for (int times=0; times<AO.Optimize.mctsnumexpand && !mcts_root.finished &&
2265 (AO.Optimize.mctstimelimit==0 || (TimeWallClock(1)-start_time)/100 < AO.Optimize.mctstimelimit);
2266 times++) {
2267 AT.optimtimes = times;
2268 // call expand_tree routine depending on threading mode
2269#if defined(WITHPTHREADS)
2270 if (AM.totalnumberofthreads > 1)
2271 find_Horner_MCTS_expand_tree_threaded();
2272 else
2273#elif defined(WITHMPI)
2274 if (PF.numtasks > 1)
2275 PF_find_Horner_MCTS_expand_tree_master();
2276 else
2277#endif
2278 find_Horner_MCTS_expand_tree();
2279 }
2280
2281 // if TForm or ParForm, wait for everyone to finish
2282#ifdef WITHPTHREADS
2283 MasterWaitAll();
2284#endif
2285#ifdef WITHMPI
2286 PF_find_Horner_MCTS_expand_tree_master_wait();
2287#endif
2288#ifdef DEBUG
2289 MesPrint ("*** [%s, w=%w] DONE: find_Horner_MCTS", thetime_str().c_str());
2290#endif
2291
2292}
2293
2294/*
2295 #] find_Horner_MCTS :
2296 #[ merge_operators :
2297*/
2298
2356vector<WORD> merge_operators (const vector<WORD> &all_instr, bool move_coeff) {
2357
2358#ifdef DEBUG_MORE
2359 MesPrint ("*** [%s, w=%w] CALL: merge_operators", thetime_str().c_str());
2360#endif
2361
2362 // get starting positions of instructions
2363 vector<const WORD *> instr;
2364 const WORD *tbegin = &*all_instr.begin();
2365 const WORD *tend = tbegin+all_instr.size();
2366 // copy all instructions to temp space. There will be n of them in instr.
2367 for (const WORD *t=tbegin; t!=tend; t+=*(t+2)) {
2368 instr.push_back(t);
2369 }
2370 int n = instr.size();
2371
2372 // find parents and number of parents of instructions
2373 vector<int> par(n), numpar(n,0);
2374 for (int i=0; i<n; i++) par[i]=i;
2375
2376 for (int i=0; i<n; i++) {
2377 for (const WORD *t=instr[i]+OPTHEAD; *t!=0; t+=*t) {
2378 if ( *(t+1)==EXTRASYMBOL && *t!=1+ABS(*(t+*t-1)) ) {
2379 // extra symbol t[3] is referred to in instr i.
2380 par[*(t+3)]=i;
2381 numpar[*(t+3)]++;
2382
2383 // if coefficient isn't +/-1 or power > 1, increase numpar,
2384 // so that this is not merged
2385 if (*(t+*t-3)!=1 || *(t+*t-2)!=1 || ABS(*(t+*t-1))!=3) numpar[*(t+3)]++;
2386 if (*(t+4)>1) numpar[*(t+3)]++;
2387 }
2388 }
2389 }
2390 // determine which instructions to merge
2391 stack<int> s;
2392 s.push(n-1);
2393 vector<bool> vis(n,false);
2394
2395 while (!s.empty()) {
2396
2397 int i=s.top(); s.pop();
2398 if (vis[i]) continue;
2399 vis[i]=true;
2400
2401 for (const WORD *t=instr[i]+OPTHEAD; *t!=0; t+=*t)
2402 if ( *(t+1)==EXTRASYMBOL && *t!=1+ABS(*(t+*t-1)) )
2403 s.push(*(t+3));
2404
2405 // condition: one parent and equal operator as parent
2406 if (numpar[i]==1 && *(instr[i]+1)==*(instr[par[i]]+1))
2407 par[i] = par[par[i]]; // The expr into which we subst par[i] to get i
2408 else
2409 par[i] = i;
2410 }
2411
2412 // merge instructions into new instructions
2413 vector<WORD> newinstr;
2414
2415 // stack of new expressions, all 0-indexed
2416 stack<WORD> new_expr;
2417 new_expr.push(n-1);
2418 vis = vector<bool>(n,false);
2419
2420 // skip empty equations (might be introduced by greedy optimizations)
2421 vector<bool> skip(n,false), skipcoeff(n,false);
2422 for (int i=0; i<n; i++)
2423 if (*(instr[i]+OPTHEAD) == 0) skip[i]=skipcoeff[i]=true;
2424
2425 // for renumbering merged parents
2426 vector<int> renum_par(n);
2427 for (int i=0; i<n; i++)
2428 renum_par[i]=i;
2429
2430 while (!new_expr.empty()) {
2431 int x = new_expr.top(); new_expr.pop();
2432 if (vis[x]) continue;
2433 vis[x] = true;
2434
2435 // find all instructions with parent=x and copy their arguments
2436 // into a new expression
2437 bool first_copy=true;
2438 int lenidx = newinstr.size()+2;
2439
2440 // 1-indexed, since signs may occur
2441 stack<WORD> this_expr;
2442 this_expr.push(x+1);
2443
2444 while (!this_expr.empty()) {
2445 // pop from stack, determine expr.nr and sign
2446 int i = this_expr.top(); this_expr.pop();
2447 int sign = SGN(i);
2448 i = ABS(i)-1;
2449 for (const WORD *t=instr[i]+OPTHEAD; *t!=0; t+=*t) { // terms in i
2450 // don't copy a term if:
2451 // (1) skip=true, since then it's merged into the parent
2452 // (2) extrasymbol with parent=x, because its children should be copied
2453 // (3) coefficient with skipcoeff=true, since it's already copied
2454 bool copy = !skip[i];
2455 if (*t!=1+ABS(*(t+*t-1)) && *(t+1)==EXTRASYMBOL) {
2456 if (par[*(t+3)] == x) {
2457 // parent of term refers to x. we push it with its sign if no skip is true
2458 // and the sign of the expr.
2459 this_expr.push(sign * (skip[i]||skipcoeff[i] ? 1 : SGN(*(t+*t-1))) * (1+*(t+3)));
2460 if (*(instr[i]+1) == OPER_MUL) sign=1;
2461 copy=false;
2462 }
2463 else {
2464 new_expr.push(*(t+3));
2465 }
2466 }
2467
2468 if (*t == 1+ABS(*(t+*t-1)) && skipcoeff[i]) {
2469 copy=false;
2470 }
2471
2472 if (copy) {
2473 // first term, so add header
2474 if (first_copy) {
2475 newinstr.push_back(renum_par[x]); // expr.nr.
2476 newinstr.push_back(*(instr[x]+1)); // operator
2477 newinstr.push_back(3); // length OPTHEAD?
2478 first_copy=false;
2479 }
2480
2481 // copy term and adjust sign
2482 int thislenidx = newinstr.size();
2483 newinstr.insert(newinstr.end(), t, t+*t); // Put the whole term in newinstr
2484 newinstr.back() *= sign;
2485 if (*(instr[i]+1) == OPER_MUL) sign=1;
2486 newinstr[lenidx] += *t;
2487
2488 // check for moving coefficients up
2489 // necessary condition: MUL-expression with 1 parent
2490 if (move_coeff && *t!=1+ABS(*(t+*t-1)) && *(instr[i]+1)!=OPER_COMMA &&
2491 *(t+1)==EXTRASYMBOL && numpar[*(t+3)]==1 && *(instr[*(t+3)]+1)==OPER_MUL) {
2492
2493 // coefficient is always the first term (that's how Horner+generate works)
2494 const WORD *t1 = instr[*(t+3)]+OPTHEAD;
2495 const WORD *t2 = t1+*t1;
2496
2497 if (*t1 == 1+ABS(*(t1+*t1-1))) {
2498 // t1 pointer to a coefficient, so move it
2499
2500 // remove old coefficient of 1
2501 WORD *t3 = &*newinstr.end(); //
2502 int sign2 = SGN(t3[-1]); //
2503 newinstr.erase(newinstr.end()-3, newinstr.end());
2504 // count number of arguments; iff it is 2 move the (extra)symbol too
2505 int numargs=0;
2506 for (const WORD *tt=t1; *tt!=0; tt+=*tt) {
2507 numargs++;
2508 }
2509 if (numargs==2 && *(t2+4)==1) {
2510 // replace (extra)symbol
2511 newinstr[newinstr.size()-4] = *(t2+1);
2512 newinstr[newinstr.size()-3] = *(t2+2);
2513 newinstr[newinstr.size()-2] = *(t2+3);
2514 newinstr[newinstr.size()-1] = *(t2+4);
2515 sign2 *= SGN(*(t2+*t2-1)); // was t2[7]
2516
2517 // ignore this expression from now on
2518 skip[*(t+3)]=true;
2519 if (*(t2+1)==EXTRASYMBOL)
2520 renum_par[*(t+3)] = *(t2+3);
2521 }
2522 else {
2523 // otherwise, ignore coefficient from now on
2524 // we need to collect the signs of the terms
2525 // first and set them to one. This was forgotten
2526 // before. Gave occasional errors.
2527 if ( numargs > 2 || ( numargs == 2 && t2[4] > 1 ) ) {
2528 for (WORD *tt=(WORD *)t2; *tt!=0; tt+=*tt) {
2529 if ( tt[*tt-1] < 0 ) {
2530 tt[*tt-1] = -tt[*tt-1];
2531 sign2 = -sign2;
2532 }
2533 }
2534 }
2535 skipcoeff[*(t+3)]=true;
2536 }
2537
2538 // add new coefficient
2539 newinstr.insert(newinstr.end(), t1+1, t1+*t1);
2540 newinstr.back() *= sign2;
2541 newinstr[thislenidx] += ABS(newinstr.back()) - 3;
2542 newinstr[lenidx] += ABS(newinstr.back()) - 3;
2543 }
2544 }
2545 }
2546 }
2547 }
2548
2549 // if something has been copied, add trailing zero
2550 if (!first_copy) {
2551 newinstr.push_back(0);
2552 newinstr[lenidx]++;
2553 }
2554 }
2555
2556 // renumber the expressions to 0,1,2,..,; only keep expressions with
2557 // skip=false which are their own parent after a renumbering in case
2558 // of moved coefficients
2559
2560 // find renumber scheme
2561 vector<int> renum(n,-1);
2562 int next=0;
2563 for (int i=0; i<n; i++)
2564 if (!skip[i] && renum_par[par[i]]==i) renum[renum_par[i]]=next++;
2565
2566 // find new instruction index
2567 tbegin = &*newinstr.begin();
2568 tend = tbegin+newinstr.size();
2569 for (const WORD *t=tbegin; t!=tend; t+=*(t+2))
2570 instr[renum[*t]] = t;
2571
2572 // renumbering expressions and sort them lexicographically (in
2573 // new_instr they are in the preorder of the expression tree)
2574 vector<WORD> sortinstr;
2575 for (int i=0; i<next; i++) {
2576 int idx = sortinstr.size();
2577 sortinstr.insert(sortinstr.end(), instr[i], instr[i]+*(instr[i]+2));
2578
2579 sortinstr[idx] = renum[sortinstr[idx]];
2580
2581 // renumber content of an expression
2582 for (WORD *t2=&sortinstr[idx]+3; *t2!=0; t2+=*t2)
2583 if (*t2!=1+ABS(*(t2+*t2-1)) && *(t2+1)==EXTRASYMBOL)
2584 *(t2+3) = renum[*(t2+3)];
2585 }
2586
2587#ifdef DEBUG_MORE
2588 MesPrint ("*** [%s, w=%w] DONE: merge_operators", thetime_str().c_str());
2589#endif
2590
2591 return sortinstr;
2592}
2593
2594/*
2595 #] merge_operators :
2596 #[ class Optimization :
2597*/
2598
2624public:
2625 int type, arg1, arg2, improve;
2626 vector<WORD> coeff;
2627 vector<int> eqnidxs;
2628
2629 bool operator< (const optimization &a) const {
2630 if (arg1 != a.arg1) return arg1 < a.arg1;
2631 if (arg2 != a.arg2) return arg2 < a.arg2;
2632 if (type != a.type) return type < a.type;
2633 return coeff < a.coeff;
2634 }
2635};
2636
2637/*
2638 #] class Optimization :
2639 #[ find_optimizations :
2640*/
2641
2651vector<optimization> find_optimizations (const vector<WORD> &instr) {
2652
2653#ifdef DEBUG_GREEDY
2654 MesPrint ("*** [%s, w=%w] CALL: find_optimizations", thetime_str().c_str());
2655#endif
2656
2657// #[ Startup :
2658 // the resulting vector of optimizations
2659 vector<optimization> res;
2660
2661 // a map to count the improvement of an optimization; the
2662 // improvement is stored as a vector<int> with equation numbers
2663 map<optimization, vector<int> > cnt;
2664
2665 // a map to identify coefficients
2666 map<vector<int>,int> idx_coeff;
2667
2668 const WORD *ebegin = &*instr.begin();
2669 const WORD *eend = ebegin+instr.size();
2670
2671 for (int optim_type=0; optim_type<=4; optim_type++) {
2672
2673 cnt.clear();
2674 idx_coeff.clear();
2675
2676 optimization optim;
2677 optim.type = optim_type;
2678// #] Startup :
2679
2680// #[ type 0 : find optimizations of the form z=x^n (optim.type==0)
2681 if (optim_type == 0) {
2682 for (const WORD *e=ebegin; e!=eend; e+=*(e+2)) {
2683 if (*(e+1) != OPER_MUL) continue;
2684 for (const WORD *t=e+3; *t!=0; t+=*t) {
2685 if (*t == ABS(*(t+*t-1))+1) continue;
2686 if (*(t+4) > 1) {
2687 optim.arg1 = (*(t+1)==SYMBOL ? 1 : -1) * (*(t+3) + 1);
2688 optim.arg2 = *(t+4);
2689 cnt[optim].push_back(e-ebegin);
2690 }
2691 }
2692 }
2693 }
2694// #] type 0 :
2695// #[ type 1 : find optimizations of the form z=x*y (optim.type==1)
2696 if (optim_type == 1) {
2697 for (const WORD *e=ebegin; e!=eend; e+=*(e+2)) {
2698 if (*(e+1) != OPER_MUL) continue;
2699
2700 for (const WORD *t1=e+3; *t1!=0; t1+=*t1) {
2701 if (*t1 == ABS(*(t1+*t1-1))+1) continue;
2702 int x1 = (*(t1+1)==SYMBOL ? 1 : -1) * (*(t1+3) + 1);
2703
2704 for (const WORD *t2=t1+*t1; *t2!=0; t2+=*t2) {
2705 if (*t2 == ABS(*(t2+*t2-1))+1) continue;
2706 int x2 = (*(t2+1)==SYMBOL ? 1 : -1) * (*(t2+3) + 1);
2707
2708 if (*(t1+4) == *(t2+4)) {
2709 optim.arg1 = x1;
2710 optim.arg2 = x2;
2711 if (optim.arg1 > optim.arg2)
2712 swap(optim.arg1, optim.arg2);
2713
2714 if (*(t1+4) == 1)
2715 cnt[optim].push_back(e-ebegin);
2716 else {
2717 // E=x^n*y^n -> z=x*y; E=z^n is double improvement
2718 cnt[optim].push_back(e-ebegin);
2719 cnt[optim].push_back(e-ebegin);
2720 }
2721 }
2722 }
2723 }
2724 }
2725 }
2726// #] type 1 :
2727// #[ type 2 : find optimizations of the form z=c*x (optim.type==2)
2728 if (optim_type == 2) {
2729 for (const WORD *e=ebegin; e!=eend; e+=*(e+2)) {
2730
2731 if (*(e+1) == OPER_ADD) {
2732 // in ADD-equation
2733
2734 for (const WORD *t=e+3; *t!=0; t+=*t) {
2735 if (*t == ABS(*(t+*t-1))+1) continue;
2736
2737 if (*(t+4)==1) {
2738 if (ABS(*(t+*t-1))==3 && *(t+*t-2)==1 && *(t+*t-3)==1) continue;
2739
2740 optim.coeff = vector<WORD>(t+*t-ABS(*(t+*t-1)), t+*t);
2741 optim.coeff.back() = ABS(optim.coeff.back());
2742
2743 optim.arg1 = (*(t+1)==SYMBOL ? 1 : -1) * (*(t+3) + 1);
2744 optim.arg2 = 0;
2745
2746 cnt[optim].push_back(e-ebegin);
2747 }
2748 }
2749 }
2750 else if (*(e+1) == OPER_MUL) {
2751 // in MUL-equation
2752 optim.coeff.clear();
2753
2754 for (const WORD *t=e+3; *t!=0; t+=*t)
2755 if (*t == ABS(*(t+*t-1))+1) {
2756 if (ABS(*(t+*t-1))==3 && *(t+*t-2)==1 && *(t+*t-3)==1) continue;
2757 optim.coeff = vector<WORD>(t+*t-ABS(*(t+*t-1)), t+*t);
2758 optim.coeff.back() = ABS(optim.coeff.back());
2759 }
2760
2761 if (!optim.coeff.empty())
2762 for (const WORD *t=e+3; *t!=0; t+=*t) {
2763 if (*t == ABS(*(t+*t-1))+1) continue;
2764 if (*(t+4) != 1) continue;
2765 optim.arg1 = (*(t+1)==SYMBOL ? 1 : -1) * (*(t+3) + 1);
2766 optim.arg2 = 0;
2767 cnt[optim].push_back(e-ebegin);
2768 }
2769 }
2770 }
2771 }
2772// #] type 2 :
2773// #[ type 3 : find optimizations of the form z=x+c (optim.type==3)
2774 if (optim_type == 3) {
2775 for (const WORD *e=ebegin; e!=eend; e+=*(e+2)) {
2776
2777 if (*(e+1) != OPER_ADD) continue;
2778
2779 optim.coeff.clear();
2780
2781 for (const WORD *t=e+3; *t!=0; t+=*t)
2782 if (*t == ABS(*(t+*t-1))+1) {
2783 optim.coeff = vector<WORD>(t+*t-ABS(*(t+*t-1)), t+*t);
2784 }
2785
2786 if (!optim.coeff.empty()) {
2787 for (const WORD *t=e+3; *t!=0; t+=*t) {
2788 if (*t == ABS(*(t+*t-1))+1) continue;
2789 if (ABS(*(t+*t-1))!=3 || *(t+*t-2)!=1 || *(t+*t-3)!=1) continue;
2790 if (*(t+*t-1)==-3) optim.coeff.back() *= -1;
2791
2792 optim.arg1 = (*(t+1)==SYMBOL ? 1 : -1) * (*(t+3) + 1);
2793 optim.arg2 = 0;
2794 cnt[optim].push_back(e-ebegin);
2795 if (*(t+*t-1)==-3) optim.coeff.back() *= -1;
2796 }
2797 }
2798 }
2799 }
2800// #] type 3 :
2801// #[ type 4,5 : find optimizations of the form z=x+y or z=x-y (optim.type==4 or 5)
2802 if (optim_type == 4) {
2803 for (const WORD *e=ebegin; e!=eend; e+=*(e+2)) {
2804 if (*(e+1) != OPER_ADD) continue;
2805
2806 for (const WORD *t1=e+3; *t1!=0; t1+=*t1) {
2807 if (*t1 == ABS(*(t1+*t1-1))+1) continue;
2808 int x1 = (*(t1+1)==SYMBOL ? 1 : -1) * (*(t1+3) + 1);
2809
2810 for (const WORD *t2=t1+*t1; *t2!=0; t2+=*t2) {
2811 if (*t2 == ABS(*(t2+*t2-1))+1) continue;
2812 int x2 = (*(t2+1)==SYMBOL ? 1 : -1) * (*(t2+3) + 1);
2813
2814 int sign1 = SGN(*(t1+*t1-1));
2815 int sign2 = SGN(*(t2+*t2-1));
2816
2817 if (BigLong((UWORD *)t1+5, ABS(*(t1+*t1-1))-1, (UWORD *)t2+5, ABS(*(t2+*t2-1))-1) == 0) {
2818
2819 optim.type = (sign1 * sign2 == 1 ? 4 : 5); // optimization type
2820 optim.arg1 = x1;
2821 optim.arg2 = x2;
2822 if (optim.arg1 > optim.arg2) {
2823 swap(optim.arg1, optim.arg2);
2824 }
2825
2826 if (ABS(*(t1+*t1-1))==3 && *(t1+*t1-2)==1 && *(t1+*t1-3)==1)
2827 cnt[optim].push_back(e-ebegin);
2828 else {
2829 // E=2x+2y -> z=x+y; E=2z is improvement bby itself
2830 cnt[optim].push_back(e-ebegin);
2831 cnt[optim].push_back(e-ebegin);
2832 }
2833 }
2834 }
2835 }
2836 }
2837 }
2838// #] type 4,5 :
2839// #[ add :
2840
2841 // add optimizations with positive improvement to the result
2842 for (map<optimization, vector<int> >::iterator i=cnt.begin(); i!=cnt.end(); i++) {
2843 int improve = i->second.size() - 1;
2844 if (improve > 0) {
2845 res.push_back(i->first);
2846 res.back().improve = improve;
2847 res.back().eqnidxs = i->second;
2848
2849 // remove duplicates, that were add to get the correct improvement
2850 res.back().eqnidxs.erase(unique(res.back().eqnidxs.begin(), res.back().eqnidxs.end()), res.back().eqnidxs.end());
2851 }
2852 }
2853 }
2854// #] add :
2855
2856#ifdef DEBUG_GREEDY
2857 MesPrint ("*** [%s, w=%w] DONE: find_optimizations",thetime_str().c_str());
2858#endif
2859
2860 return res;
2861}
2862
2863/*
2864 #] find_optimizations :
2865 #[ do_optimization :
2866*/
2867
2884bool do_optimization (const optimization optim, vector<WORD> &instr, int newid) {
2885
2886// #[ Debug code :
2887#ifdef DEBUG_GREEDY
2888 if (optim.type==0)
2889 MesPrint ("*** [%s, w=%w] CALL: do_optimization(improve=%d, %c%d^%d)",
2890 thetime_str().c_str(), optim.improve,
2891 optim.arg1>0?'x':'Z', ABS(optim.arg1)-1, optim.arg2);
2892 else if (optim.type==1 || optim.type>=4)
2893 MesPrint ("*** [%s, w=%w] CALL: do_optimization(improve=%d, %c%d%c%c%d)",
2894 thetime_str().c_str(), optim.improve,
2895 optim.arg1>0?'x':'Z', ABS(optim.arg1)-1,
2896 optim.type==1 ? '*' : optim.type==4 ? '+' : '-',
2897 optim.arg2>0?'x':'Z', ABS(optim.arg2)-1);
2898 else {
2899 WORD n = optim.coeff.back()/2;
2900 UBYTE num[BITSINWORD*ABS(n)], den[BITSINWORD*ABS(n)];
2901 PrtLong((UWORD *)&optim.coeff[0], n, num);
2902 PrtLong((UWORD *)&optim.coeff[ABS(n)], ABS(n), den);
2903 MesPrint ("*** [%s, w=%w] CALL: do_optimization(improve=%d, %c%d%c%s/%s)",
2904 thetime_str().c_str(), optim.improve,
2905 optim.arg1>0?'x':'Z', ABS(optim.arg1)-1,
2906 optim.type==2 ? '*' : '+', num,den);
2907 }
2908#endif
2909// #] Debug code :
2910
2911 bool substituted = false;
2912 WORD *ebegin = &*instr.begin();
2913
2914// #[ type 0 : substitution of the form z=x^n (optim.type==0)
2915 if (optim.type == 0) {
2916
2917 int vartypex = optim.arg1>0 ? SYMBOL : EXTRASYMBOL;
2918 int varnumx = ABS(optim.arg1) - 1;
2919 int n = optim.arg2;
2920
2921 for (int i=0; i<(int)optim.eqnidxs.size(); i++) {
2922 WORD *e = ebegin + optim.eqnidxs[i];
2923 if (*(e+1) != OPER_MUL) continue;
2924
2925 // scan through equation
2926 for (WORD *t=e+3; *t!=0; t+=*t) {
2927 if (*t == ABS(*(t+*t-1))+1) continue;
2928 if (*(t+1)==vartypex &&
2929 *(t+3)==varnumx &&
2930 *(t+4) % n == 0) {
2931
2932 // substitute
2933 *(t+1) = EXTRASYMBOL;
2934 *(t+3) = newid;
2935 *(t+4) /= n;
2936
2937 substituted = true;
2938 }
2939 }
2940 }
2941
2942 if (!substituted) {
2943#ifdef DEBUG_GREEDY
2944 MesPrint ("*** [%s, w=%w] DONE: do_optimization : res=false", thetime_str().c_str(), optim.improve);
2945#endif
2946 return false;
2947 }
2948
2949 // add extra equation (Tnew = x^n)
2950 instr.push_back(newid); // eqn.nr
2951 instr.push_back(OPER_MUL); // operator
2952 instr.push_back(12); // total length
2953 instr.push_back(8); // term length
2954 instr.push_back(vartypex); // (extra)symbol
2955 instr.push_back(4); // symbol length
2956 instr.push_back(varnumx); // symbol id
2957 instr.push_back(n); // power
2958 instr.push_back(1);
2959 instr.push_back(1); // coeffient 1
2960 instr.push_back(3);
2961 instr.push_back(0); // trailing 0
2962 }
2963// #] type 0 :
2964// #[ type 1 : substitution of the form z=x*y (optim.type==1)
2965 if (optim.type == 1) {
2966
2967 int vartypex = optim.arg1>0 ? SYMBOL : EXTRASYMBOL;
2968 int varnumx = ABS(optim.arg1) - 1;
2969 int vartypey = optim.arg2>0 ? SYMBOL : EXTRASYMBOL;
2970 int varnumy = ABS(optim.arg2) - 1;
2971
2972 for (int i=0; i<(int)optim.eqnidxs.size(); i++) {
2973 WORD *e = ebegin + optim.eqnidxs[i];
2974 if (*(e+1) != OPER_MUL) continue;
2975
2976 // scan through equation
2977 int powx=0, powy=0;
2978 for (WORD *t=e+3; *t!=0; t+=*t) {
2979 if (*t == ABS(*(t+*t-1))+1) continue;
2980 if (*(t+1)==vartypex && *(t+3)==varnumx) powx = *(t+4);
2981 if (*(t+1)==vartypey && *(t+3)==varnumy) powy = *(t+4);
2982 }
2983
2984 // substitute if found
2985 if (powx>0 && powy>0 && powx==powy) {
2986
2987 WORD sign = 1;
2988 WORD *newt = e+3;
2989
2990 for (WORD *t=e+3; *t!=0;) {
2991 int dt=*t;
2992
2993 if (*t == ABS(*(t+*t-1))+1 ||
2994 (!(*(t+1)==vartypex && *(t+3)==varnumx) &&
2995 !(*(t+1)==vartypey && *(t+3)==varnumy))) {
2996 memmove(newt, t, *t*sizeof(WORD));
2997 newt += dt;
2998 }
2999 else {
3000 sign *= SGN(*(t+*t-1));
3001 }
3002
3003 t+=dt;
3004 }
3005
3006 *newt++ = 8; // term length
3007 *newt++ = EXTRASYMBOL; // extrasymbol
3008 *newt++ = 4; // symbol length
3009 *newt++ = newid; // symbol id
3010 *newt++ = powx; // power
3011 *newt++ = 1;
3012 *newt++ = 1; // coefficient +/-1
3013 *newt++ = 3*sign;
3014 *newt++ = 0; // trailing 0
3015
3016 substituted = true;
3017 }
3018 }
3019
3020 if (!substituted) {
3021#ifdef DEBUG_GREEDY
3022 MesPrint ("*** [%s, w=%w] DONE: do_optimization : res=false", thetime_str().c_str(), optim.improve);
3023#endif
3024 return false;
3025 }
3026
3027 // add extra equation (Tnew = x*y)
3028 instr.push_back(newid); // eqn.nr
3029 instr.push_back(OPER_MUL); // operator
3030 instr.push_back(20); // total length
3031 instr.push_back(8); // LHS length
3032 instr.push_back(vartypex); // (extra)symbol
3033 instr.push_back(4); // symbol length
3034 instr.push_back(varnumx); // symbol id
3035 instr.push_back(1); // power 1
3036 instr.push_back(1);
3037 instr.push_back(1); // coefficient 1
3038 instr.push_back(3);
3039 instr.push_back(8); // RHS length
3040 instr.push_back(vartypey); // (extra)symbol
3041 instr.push_back(4); // symbol length
3042 instr.push_back(varnumy); // symbol id
3043 instr.push_back(1); // power 1
3044 instr.push_back(1);
3045 instr.push_back(1); // coefficient 1
3046 instr.push_back(3);
3047 instr.push_back(0); // trailing 0
3048 }
3049// #] type 1 :
3050// #[ type 2 : substitution of the form z=c*x (optim.type==2)
3051
3052 if (optim.type == 2) {
3053 int vartype = optim.arg1>0 ? SYMBOL : EXTRASYMBOL;
3054 int varnum = ABS(optim.arg1) - 1;
3055
3056 WORD ncoeff = optim.coeff.back();
3057
3058 // scan through equations
3059 for (int i=0; i<(int)optim.eqnidxs.size(); i++) {
3060 WORD *e = ebegin + optim.eqnidxs[i];
3061
3062 if (*(e+1) == OPER_ADD) {
3063 // scan through ADD-equation
3064 for (WORD *t=e+3; *t!=0; t+=*t) {
3065
3066 if (*t == ABS(*(t+*t-1))+1) continue;
3067
3068 if (*(t+1)==vartype && ABS(*(t+3))==varnum && *(t+4)==1 &&
3069 BigLong((UWORD *)&optim.coeff[0],ncoeff-1,
3070 (UWORD *)t+*t-ABS(*(t+*t-1)),ABS(*(t+*t-1))-1) == 0) {
3071 // substitute
3072
3073 int sign = SGN(*(t+*t-1));
3074
3075 WORD *tend = t;
3076 while (*tend!=0) tend+=*tend;
3077 WORD nmove = tend - t - *t;
3078 memmove(t, t+*t, nmove*sizeof(WORD));
3079 t += nmove;
3080
3081 *t++ = 8; // term length
3082 *t++ = EXTRASYMBOL; // (extra)symbol
3083 *t++ = 4; // symbol length
3084 *t++ = newid; // symbol id
3085 *t++ = 1; // power of 1
3086 *t++ = 1;
3087 *t++ = 1; // coefficient of +/-1
3088 *t++ = 3 * sign;
3089 *t++ = 0; // trailing 0
3090
3091 substituted = true;
3092 break;
3093 }
3094 }
3095 }
3096 else if (*(e+1) == OPER_MUL) {
3097
3098 bool coeff_match=false, var_match=false;
3099 int sign = 1;
3100
3101 // scan through MUL-equation
3102 for (WORD *t=e+3; *t!=0; t+=*t) {
3103 if (*t == ABS(*(t+*t-1))+1 && BigLong((UWORD *)&optim.coeff[0],ncoeff-1,
3104 (UWORD *)t+*t-ABS(*(t+*t-1)),ABS(*(t+*t-1))-1) == 0) {
3105 coeff_match = true;
3106 sign *= SGN(*(t+*t-1));
3107 }
3108 else if (*(t+1)==vartype && ABS(*(t+3))==varnum && *(t+4)==1) {
3109 var_match = true;
3110 sign *= SGN(*(t+*t-1));
3111 }
3112 }
3113
3114 // substitute if found
3115 if (coeff_match && var_match) {
3116
3117 WORD *newt = e+3;
3118
3119 for (WORD *t=e+3; *t!=0;) {
3120
3121 int dt=*t;
3122
3123 if (*t!=ABS(*(t+*t-1))+1 && !(*(t+1)==vartype && ABS(*(t+3))==varnum && *(t+4)==1)) {
3124 memmove(newt, t, dt*sizeof(WORD));
3125 newt += dt;
3126 }
3127
3128 t+=dt;
3129 }
3130
3131 *newt++ = 8; // term length
3132 *newt++ = EXTRASYMBOL; // extrasymbol
3133 *newt++ = 4; // symbol length
3134 *newt++ = newid; // symbol id
3135 *newt++ = 1; // power of 1
3136 *newt++ = 1;
3137 *newt++ = 1; // coefficient of +/-1
3138 *newt++ = 3 * sign;
3139 *newt++ = 0; // trailing 0
3140
3141 substituted = true;
3142 }
3143 }
3144 }
3145
3146 if (!substituted) {
3147#ifdef DEBUG_GREEDY
3148 MesPrint ("*** [%s, w=%w] DONE: do_optimization : res=false", thetime_str().c_str(), optim.improve);
3149#endif
3150 return false;
3151 }
3152
3153 // add extra equation (Tnew = c*y)
3154 instr.push_back(newid); // eqn.nr
3155 instr.push_back(OPER_ADD); // operator
3156 instr.push_back(9+ABS(ncoeff)); // total length
3157 instr.push_back(5+ABS(ncoeff)); // term length
3158 instr.push_back(vartype); // (extra)symbol
3159 instr.push_back(4); // symbol length
3160 instr.push_back(varnum); // symbol id
3161 instr.push_back(1); // power of 1
3162 for (int i=0; i<ABS(ncoeff); i++) // coefficient
3163 instr.push_back(optim.coeff[i]);
3164 instr.push_back(0); // trailing 0
3165 }
3166// #] type 2 :
3167// #[ type 3 : substitution of the form z=x+c (optim.type==3)
3168 if (optim.type == 3) {
3169 int vartype = optim.arg1>0 ? SYMBOL : EXTRASYMBOL;
3170 int varnum = ABS(optim.arg1) - 1;
3171
3172 WORD ncoeff = optim.coeff.back();
3173
3174 // scan through equation
3175 for (int i=0; i<(int)optim.eqnidxs.size(); i++) {
3176 WORD *e = ebegin + optim.eqnidxs[i];
3177
3178 if (*(e+1) != OPER_ADD) continue;
3179
3180 int coeff_match=0, var_match=0;
3181
3182 for (WORD *t=e+3; *t!=0; t+=*t) {
3183 if (*t == ABS(*(t+*t-1))+1 && BigLong((UWORD *)&optim.coeff[0],ABS(ncoeff)-1,
3184 (UWORD *)t+*t-ABS(*(t+*t-1)),ABS(*(t+*t-1))-1) == 0)
3185 coeff_match = SGN(ncoeff) * SGN(*(t+*t-1));
3186 else if (*(t+1)==vartype && ABS(*(t+3))==varnum && *(t+4)==1)
3187 var_match = SGN(*(t+7));
3188 }
3189
3190 // substitute if found (x+c and -x-c and matches)
3191 if (coeff_match * var_match == 1) {
3192
3193 WORD *newt = e+3;
3194
3195 for (WORD *t=e+3; *t!=0;) {
3196 int dt=*t;
3197 if (*t!=ABS(*(t+*t-1))+1 && !(*(t+1)==vartype && ABS(*(t+3))==varnum && *(t+4)==1)) {
3198 memmove(newt, t, dt*sizeof(WORD));
3199 newt += dt;
3200 }
3201 t+=dt;
3202 }
3203
3204 *newt++ = 8; // term length
3205 *newt++ = EXTRASYMBOL; // extrasymbol
3206 *newt++ = 4; // symbol length
3207 *newt++ = newid; // symbol id
3208 *newt++ = 1; // power of 1
3209 *newt++ = 1;
3210 *newt++ = 1; // coefficient of +/-1
3211 *newt++ = 3*coeff_match;
3212 *newt++ = 0; // trailing zero
3213
3214 substituted = true;
3215 }
3216 }
3217
3218 if (!substituted) {
3219#ifdef DEBUG_GREEDY
3220 MesPrint ("*** [%s, w=%w] DONE: do_optimization : res=false", thetime_str().c_str(), optim.improve);
3221#endif
3222 return false;
3223 }
3224
3225 // add extra equation (Tnew = x+c)
3226 instr.push_back(newid); // eqn.nr
3227 instr.push_back(OPER_ADD); // operator
3228 instr.push_back(13+ABS(ncoeff)); // total length
3229 instr.push_back(8); // x-term length
3230 instr.push_back(vartype); // (extra)symbol
3231 instr.push_back(4); // symbol length
3232 instr.push_back(varnum); // symbol id
3233 instr.push_back(1); // power of 1
3234 instr.push_back(1);
3235 instr.push_back(1); // coefficient of 1
3236 instr.push_back(3);
3237 instr.push_back(ABS(ncoeff)+1); // c-term length
3238 for (int i=0; i<ABS(ncoeff); i++) // coefficient
3239 instr.push_back(optim.coeff[i]);
3240 instr.push_back(0); // trailing zero
3241 }
3242// #] type 3 :
3243// #[ type 4,5 : substitution of the form z=x+y or z=x-y (optim.type=4 or 5)
3244 if (optim.type >= 4) {
3245
3246 int vartypex = optim.arg1>0 ? SYMBOL : EXTRASYMBOL;
3247 int varnumx = ABS(optim.arg1) - 1;
3248 int vartypey = optim.arg2>0 ? SYMBOL : EXTRASYMBOL;
3249 int varnumy = ABS(optim.arg2) - 1;
3250
3251 // scan through equations
3252 for (int i=0; i<(int)optim.eqnidxs.size(); i++) {
3253 WORD *e = ebegin + optim.eqnidxs[i];
3254
3255 if (*(e+1) != OPER_ADD) continue;
3256
3257 const WORD *coeffx=NULL, *coeffy=NULL;
3258 WORD ncoeffx=0,ncoeffy=0;
3259
3260 // looks for terms
3261 for (WORD *t=e+3; *t!=0; t+=*t) {
3262 if (*t == ABS(*(t+*t-1))+1) continue; // constant
3263 if (*(t+1)==vartypex && *(t+3)==varnumx && *(t+4)==1) {
3264 coeffx = t+5;
3265 ncoeffx = *(t+*t-1);
3266 }
3267 if (*(t+1)==vartypey && *(t+3)==varnumy && *(t+4)==1) {
3268 coeffy = t+5;
3269 ncoeffy = *(t+*t-1);
3270 }
3271 }
3272
3273 // check signs (type=4: x+y and -x-y, type=5: x-y and -x+y) ??????
3274 // check signs (type=4: x+y, type=5: x-y) !!!!!!!!!!
3275 if (SGN(ncoeffx) * SGN(ncoeffy) * (optim.type==4 ? 1 : -1) == 1) {
3276 // check absolute value of coeeficients
3277 if (BigLong((UWORD *)coeffx, ABS(ncoeffx)-1, (UWORD *)coeffy, ABS(ncoeffy)-1) == 0) {
3278 // substitute
3279 vector<WORD> coeff(coeffx, coeffx+ABS(ncoeffx));
3280
3281 WORD *newt = e+3;
3282/*
3283if ( optim.type == 5 ) {
3284 while ( *newt ) newt+=*newt;
3285 int i = (newt - e) - 3;
3286 MesPrint(" < %a",i,e+3);
3287 newt = e+3;
3288}
3289*/
3290 for (WORD *t=e+3; *t!=0;) {
3291 int dt=*t;
3292 if (*t == ABS(*(t+*t-1))+1 ||
3293 (!(*(t+1)==vartypex && *(t+3)==varnumx) &&
3294 !(*(t+1)==vartypey && *(t+3)==varnumy))) {
3295 memmove(newt, t, dt*sizeof(WORD));
3296 newt += dt;
3297 }
3298 t+=dt;
3299 }
3300
3301 *newt++ = 5 + ABS(ncoeffx); // term length
3302 *newt++ = EXTRASYMBOL; // extrasymbol
3303 *newt++ = 4; // symbol length
3304 *newt++ = newid; // symbol id
3305 *newt++ = 1; // power of 1
3306 for (int j=0; j<ABS(ncoeffx); j++)// coefficient
3307 *newt++ = coeff[j];
3308 *newt++ = 0; // trailing 0
3309 substituted = true;
3310/*
3311if ( optim.type == 5 ) {
3312 int i = (newt - e) - 4;
3313 MesPrint(" > %a",i,e+3);
3314}
3315*/
3316 }
3317 }
3318 }
3319
3320 if (!substituted) {
3321#ifdef DEBUG_GREEDY
3322 MesPrint ("*** [%s, w=%w] DONE: do_optimization : res=false", thetime_str().c_str(), optim.improve);
3323#endif
3324 return false;
3325 }
3326/*
3327if ( optim.type == 5 )
3328MesPrint ("improve=%d, %c%d%c%c%d)", optim.improve,
3329 optim.arg1>0?'x':'Z', ABS(optim.arg1)-1,
3330 optim.type==1 ? '*' : optim.type==4 ? '+' : '-',
3331 optim.arg2>0?'x':'Z', ABS(optim.arg2)-1);
3332*/
3333 // add extra equation (Tnew = x+/-y)
3334 instr.push_back(newid); // eqn.nr
3335 instr.push_back(OPER_ADD); // operator
3336 instr.push_back(20); // total length
3337 instr.push_back(8); // term length
3338 instr.push_back(vartypex); // (extra)symbol
3339 instr.push_back(4); // symbol length
3340 instr.push_back(varnumx); // symbol id
3341 instr.push_back(1); // power of 1
3342 instr.push_back(1);
3343 instr.push_back(1); // coefficient of 1
3344 instr.push_back(3);
3345 instr.push_back(8); // term length
3346 instr.push_back(vartypey); // (extra)symbol
3347 instr.push_back(4); // symbol length
3348 instr.push_back(varnumy); // symbol id
3349 instr.push_back(1); // power of 1
3350 instr.push_back(1);
3351 instr.push_back(1); // coefficient of +/-1
3352 instr.push_back(3*(optim.type==4?1:-1));
3353 instr.push_back(0); // trailing 0
3354 }
3355// #] type 4,5 :
3356// #[ trivial : remove trivial equations of the form Zi = +/-Zj
3357 vector<int> renum(newid+1, 0);
3358 bool do_renum=false;
3359
3360 // vector may be moved when it is extended
3361 ebegin = &*instr.begin();
3362
3363 for (int i=0; i<(int)optim.eqnidxs.size(); i++) {
3364 WORD *e = ebegin + optim.eqnidxs[i];
3365 WORD *t = e+3;
3366 if (*t==0) continue; // empty (removed) equation
3367 if (*(t+*t)!=0) continue; // more than 1 term
3368 if (*t!=8) continue; // term not of correct form
3369 if (*(t+4)!=1) continue; // power != 1
3370 if (*(t+5)!=1 || *(t+6)!=1) continue; // coeff != +/-1
3371
3372 // trivial term, so renumber this one
3373 renum[*e] = SGN(*(t+7)) * (*(t+3) + 1);
3374 do_renum = true;
3375
3376 // remove equation
3377 *t=0;
3378 }
3379// #] trivial :
3380// #[ renumbering :
3381
3382 // there are renumberings to be done, so loop through all equations
3383 if (do_renum) {
3384 WORD *eend = ebegin+instr.size();
3385
3386 for (WORD *e=ebegin; e!=eend; e+=*(e+2)) {
3387 for (WORD *t=e+3; *t!=0; t+=*t) {
3388 if (*t == ABS(*(t+*t-1))+1) continue;
3389 if (*(t+1)==EXTRASYMBOL && renum[*(t+3)]!=0) {
3390 *(t+*t-1) *= SGN(renum[*(t+3)]);
3391 *(t+3) = ABS(renum[*(t+3)]) - 1;
3392 }
3393 }
3394 }
3395 }
3396// #] renumbering :
3397
3398#ifdef DEBUG_GREEDY
3399 MesPrint ("*** [%s, w=%w] DONE: do_optimization : res=true", thetime_str().c_str(), optim.improve);
3400#endif
3401
3402 return true;
3403}
3404
3405/*
3406 #] do_optimization :
3407 #[ partial_factorize :
3408*/
3409
3433int partial_factorize (vector<WORD> &instr, int n, int improve) {
3434
3435#ifdef DEBUG_GREEDY
3436 MesPrint ("*** [%s, w=%w] CALL: partial_factorize (n=%d)", thetime_str().c_str(), n);
3437#endif
3438
3439 GETIDENTITY;
3440
3441 // get starting positions of instructions
3442 vector<int> instr_idx(n);
3443 WORD *ebegin = &*instr.begin();
3444 WORD *eend = ebegin+instr.size();
3445 for (WORD *e=ebegin; e!=eend; e+=*(e+2)) {
3446 instr_idx[*e] = e - ebegin;
3447 }
3448
3449 // get reference counts
3450/*
3451 * The next construction replaces the vector construction which is
3452 * rather costly for valgrind (and maybe also in normal running)
3453 */
3454 int nmax = 2*n;
3455 WORD *numpar = (WORD *)Malloc1(nmax*sizeof(WORD),"numpar");
3456 for ( int i = 0; i < nmax; i++ ) numpar[i] = 0;
3457// vector<WORD> numpar(n);
3458 for (WORD *e=ebegin; e!=eend; e+=*(e+2))
3459 for (WORD *t=e+3; *t!=0; t+=*t) {
3460 if (*t == ABS(*(t+*t-1))+1) continue;
3461 if (*(t+1) == EXTRASYMBOL) numpar[*(t+3)]++;
3462 }
3463
3464 // find factorizable expressions
3465 for (int i=0; i<n; i++) {
3466 WORD *e = &*instr.begin() + instr_idx[i];
3467 if (*(e+1) != OPER_ADD) continue;
3468
3469 // count symbol occurrences
3470 map<WORD,WORD> cnt; // 1-indexed, <0:EXTRASYMBOL, >0:SYMBOL
3471
3472 for (WORD *t=e+3; *t!=0; t+=*t) {
3473 if (*t==ABS(*(t+*t-1))+1) continue;
3474
3475 // count symbols in t
3476 if (*(t+4)==1)
3477 cnt[(*(t+1)==SYMBOL ? 1 : -1) * (*(t+3)+1)]++;
3478
3479 // count symbols in extrasymbols of t
3480 if (*(t+1)==EXTRASYMBOL && *(t+4)==1 && numpar[*(t+3)]==1) {
3481 WORD *t2 = &*instr.begin() + instr_idx[*(t+3)];
3482 if (*(t2+1) != OPER_MUL) continue;
3483 for (t2+=3; *t2!=0; t2+=*t2) {
3484 if (*t2 == ABS(*(t2+*t2-1))+1) continue;
3485 if (*(t2+4)==1)
3486 cnt[(*(t2+1)==SYMBOL ? 1 : -1) * (*(t2+3)+1)]++;
3487 }
3488 }
3489 }
3490
3491 // find most-occurring symbol
3492 WORD x=0, best=0;
3493 for (map<WORD,WORD>::iterator it=cnt.begin(); it!=cnt.end(); it++)
3494 if (it->second > best) { x=it->first; best=it->second; }
3495
3496 // occurrence>=2 and occurrence>improve, so factorize
3497
3498 if (best>=2 && best>improve) {
3499 // initialize new equation (Zi from example above)
3500 vector<WORD> new_eqn;
3501 new_eqn.push_back(n);
3502 new_eqn.push_back(OPER_ADD);
3503 new_eqn.push_back(0); // length
3504
3505 WORD dt;
3506 WORD *newt=e+3;
3507 for (WORD *t=e+3; *t!=0; t+=dt) {
3508 dt = *t;
3509 bool keep=true;
3510
3511 if (*t!=ABS(*(t+*t-1))+1) {
3512
3513 // factorized symbol is in t itself
3514 if (*(t+4)==1) {
3515 WORD y = (*(t+1)==SYMBOL ? 1 : -1) * (*(t+3)+1);
3516 if (y==x) {
3517 new_eqn.push_back(*t-4);
3518 new_eqn.insert(new_eqn.end(), t+5, t+dt);
3519 keep=false;
3520 }
3521 }
3522
3523 // look in extrasymbol of t with ref.count=1
3524 if (*(t+1)==EXTRASYMBOL && *(t+4)==1 && numpar[*(t+3)]==1) {
3525 WORD *t2 = &*instr.begin() + instr_idx[*(t+3)];
3526 if (*(t2+1) == OPER_MUL) {
3527 bool has_x=false;
3528 for (t2+=3; *t2!=0; t2+=*t2) {
3529 if (*t2 == ABS(*(t2+*t2-1))+1) continue;
3530 WORD y = (*(t2+1)==SYMBOL ? 1 : -1) * (*(t2+3)+1);
3531 // extrasymbol has factorized symbol
3532 if (y==x && *(t2+4)==1) {
3533 has_x=true;
3534 // copy remaining part
3535 WORD *tend=t2+*t2;
3536 WORD sign = SGN(*(tend-1));
3537 while (*tend!=0) tend+=*tend;
3538 int dt2 = tend - (t2+*t2);
3539 memmove(t2, t2+*t2, (dt2+1)*sizeof(WORD));
3540 t2 += dt2;
3541 *(t2-1) *= sign;
3542 break;
3543 }
3544 }
3545 if (has_x) {
3546 // extrasymbol has x, so add it to new equation
3547 keep=false;
3548 int thisidx=new_eqn.size();
3549 new_eqn.insert(new_eqn.end(), t, t+dt);
3550 t2 = &*instr.begin() + instr_idx[*(t+3)] + 3;
3551 // if becomes trivial, substitute the term
3552 if (*(t2+*t2)==0) {
3553 // it's a number
3554 if (*t2 == ABS(*(t2+*t2-1))+1) {
3555 if (ABS(new_eqn[new_eqn.size()-1])==3 && new_eqn[new_eqn.size()-2]==1 && new_eqn[new_eqn.size()-3]==1) {
3556 // original equation has coefficient of +/-1, so replace it
3557 WORD sign = SGN(new_eqn.back());
3558 new_eqn.erase(new_eqn.begin()+thisidx, new_eqn.end());
3559 new_eqn.insert(new_eqn.end(), t2, t2+*t2);
3560 new_eqn.back() *= sign;
3561 *t2 = 0;
3562 }
3563 else {
3564 // two non-trivial coefficients, so multiply them
3565 // note: untested code (found no way to trigger it)
3566 UWORD *tmp = NumberMalloc("partial_factorize");
3567 WORD ntmp=0;
3568 MulRat(BHEAD (UWORD *)t2+*t2-ABS(*(t2+*t2-1)), *(t2+*t2-1),
3569 (UWORD *)&*(new_eqn.end()-ABS(new_eqn.back())), new_eqn.back(),
3570 tmp, &ntmp);
3571 new_eqn.erase(new_eqn.begin()+thisidx, new_eqn.end());
3572 new_eqn.push_back(ABS(ntmp)+1);
3573 new_eqn.insert(new_eqn.end(), tmp, tmp+ABS(ntmp));
3574 NumberFree(tmp,"partial_factorize");
3575 *t2 = 0;
3576 }
3577 }
3578 else if (*(t2+4)==1) {
3579 // it's a variable
3580 new_eqn.back() *= SGN(*(t2+*t2-1));
3581 new_eqn[thisidx+1] = *(t2+1);
3582 new_eqn[thisidx+2] = *(t2+2);
3583 new_eqn[thisidx+3] = *(t2+3);
3584 new_eqn[thisidx+4] = *(t2+4);
3585 *t2 = 0;
3586 }
3587 }
3588 }
3589 }
3590 }
3591 }
3592
3593 // no x, so copy it
3594 if (keep) {
3595 memmove(newt, t, dt*sizeof(WORD));
3596 newt += dt;
3597 }
3598 }
3599
3600 // finalize new equation
3601 new_eqn.push_back(0);
3602 new_eqn[2] = new_eqn.size();
3603
3604 bool empty = newt == e+3;
3605 if ( n+1 >= nmax ) {
3606 int i, newnmax = nmax*2;
3607 WORD *newnumpar = (WORD *)Malloc1(newnmax*sizeof(WORD),"newnumpar");
3608 for ( i = 0; i < n; i++ ) newnumpar[i] = numpar[i];
3609 for ( ; i < newnmax; i++ ) newnumpar[i] = 0;
3610 M_free(numpar,"numpar");
3611 numpar = newnumpar;
3612 nmax = newnmax;
3613 }
3614// numpar.push_back(0);
3615 n++;
3616
3617 // if original is not empty, add new equation (Zj) to it
3618 // otherwise replace it later
3619 if (!empty) {
3620 *newt++ = 8;
3621 *newt++ = EXTRASYMBOL;
3622 *newt++ = 4;
3623 *newt++ = n;
3624 *newt++ = 1;
3625 *newt++ = 1;
3626 *newt++ = 1;
3627 *newt++ = 3;
3628 *newt++ = 0;
3629 }
3630
3631 // add new equation to instructions
3632 instr_idx.push_back(instr.size());
3633 instr.insert(instr.end(), new_eqn.begin(), new_eqn.end());
3634
3635 // generate another new equation (Zj=x*Zi)
3636 new_eqn.clear();
3637 new_eqn.push_back(n);
3638 new_eqn.push_back(OPER_MUL);
3639 new_eqn.push_back(20);
3640 new_eqn.push_back(8);
3641
3642 // add factorized symbol
3643 if (x>0) {
3644 new_eqn.push_back(SYMBOL);
3645 new_eqn.push_back(4);
3646 new_eqn.push_back(x-1);
3647 new_eqn.push_back(1);
3648 }
3649 else {
3650 new_eqn.push_back(EXTRASYMBOL);
3651 new_eqn.push_back(4);
3652 new_eqn.push_back(-x-1);
3653 new_eqn.push_back(1);
3654 }
3655 new_eqn.push_back(1);
3656 new_eqn.push_back(1);
3657 new_eqn.push_back(3);
3658 new_eqn.push_back(8);
3659 // add new equation (Zi)
3660 new_eqn.push_back(EXTRASYMBOL);
3661 new_eqn.push_back(4);
3662 new_eqn.push_back(n-1);
3663 new_eqn.push_back(1);
3664 new_eqn.push_back(1);
3665 new_eqn.push_back(1);
3666 new_eqn.push_back(3);
3667 new_eqn.push_back(0);
3668
3669 if (!empty) {
3670 // add new equation (Zj) to instructions
3671 instr_idx.push_back(instr.size());
3672 instr.insert(instr.end(), new_eqn.begin(), new_eqn.end());
3673 if ( n+1 >= nmax ) {
3674 int i, newnmax = nmax*2;
3675 WORD *newnumpar = (WORD *)Malloc1(newnmax*sizeof(WORD),"newnumpar");
3676 for ( i = 0; i < n; i++ ) newnumpar[i] = numpar[i];
3677 for ( ; i < newnmax; i++ ) newnumpar[i] = 0;
3678 M_free(numpar,"numpar");
3679 numpar = newnumpar;
3680 nmax = newnmax;
3681 }
3682// numpar.push_back(0);
3683 n++;
3684 }
3685 else {
3686 // replace e with Zj
3687 e = &*instr.begin() + instr_idx[i];
3688 e[1] = OPER_MUL;
3689 memcpy(e+3, &new_eqn[3], (new_eqn.size()-3)*sizeof(WORD));
3690 }
3691
3692 // decrease i, so this expression is factorized again if possible
3693 i--;
3694 }
3695 }
3696
3697#ifdef DEBUG_GREEDY
3698 MesPrint ("*** [%s, w=%w] DONE: partial_factorize (n=%d)", thetime_str().c_str(), n);
3699#endif
3700 M_free(numpar,"numpar");
3701 return n;
3702}
3703
3704/*
3705 #] partial_factorize :
3706 #[ optimize_greedy :
3707*/
3708
3727vector<WORD> optimize_greedy (vector<WORD> instr, LONG time_limit) {
3728
3729#ifdef DEBUG
3730 int old_num_oper = count_operators(instr);
3731 MesPrint ("*** [%s, w=%w] CALL: optimize_greedy(numoper=%d)",
3732 thetime_str().c_str(), old_num_oper);
3733#endif
3734
3735 LONG start_time = TimeWallClock(1);
3736
3737 WORD *ebegin = &*instr.begin();
3738 WORD *eend = ebegin+instr.size();
3739
3740 // store final equation, since it must be the last equation later
3741 int final_eqn_idx = 0;
3742 int next_eqn = 0;
3743
3744 for (WORD *e=ebegin; e!=eend; e+=*(e+2)) {
3745 next_eqn = *e + 1;
3746 final_eqn_idx = e-ebegin;
3747 }
3748 // optimize instructions
3749 while (TimeWallClock(1)-start_time < time_limit) {
3750 int old_next_eqn = next_eqn;
3751
3752 // find optimizations
3753 vector<optimization> optim = find_optimizations(instr);
3754
3755 // add_eqnidxs contains modified equations, which might have to be updated later again
3756 vector<int> add_eqnidxs;
3757
3758 // number of optimizations to do
3759 int num_do_optims = max(AO.Optimize.greedyminnum, (int)optim.size()*AO.Optimize.greedymaxperc/100);
3760
3761 // if best improvement is one, do all optimizations
3762 int best_improve=0;
3763 for (int i=0; i<(int)optim.size(); i++)
3764 best_improve = max(best_improve, optim[i].improve);
3765 if (best_improve <= 1)
3766 num_do_optims = MAXPOSITIVE;
3767 // do a number of optimizations
3768 while (optim.size() > 0 && num_do_optims-- > 0) {
3769
3770 // find best optimization
3771 int best=0;
3772 best_improve=0;
3773 for (int i=0; i<(int)optim.size(); i++)
3774 if (optim[i].improve > best_improve) {
3775 best=i;
3776 best_improve=optim[i].improve;
3777 }
3778
3779 // add extra equations
3780 for (int i=0; i<(int)add_eqnidxs.size(); i++)
3781 optim[best].eqnidxs.push_back(add_eqnidxs[i]);
3782
3783 // do optimization, update next_eqn if successful
3784 int next_idx = instr.size();
3785 if (do_optimization(optim[best], instr, next_eqn)) {
3786 next_eqn++;
3787 add_eqnidxs.push_back(next_idx);
3788 }
3789
3790 optim.erase(optim.begin()+best);
3791 }
3792
3793 // partially factorize with improve >= best_improve
3794 next_eqn = partial_factorize(instr, next_eqn, best_improve);
3795
3796 // check whether nothing has changed
3797 if (next_eqn == old_next_eqn) break;
3798 }
3799
3800 // add final equation to the back (must be by definition)
3801 instr.push_back(next_eqn);
3802 instr.insert(instr.end(), instr.begin()+final_eqn_idx+1, instr.begin()+final_eqn_idx+instr[final_eqn_idx+2]);
3803
3804 // removed original final equation
3805 instr[final_eqn_idx+3] = 0;
3806
3807 // remove extra zeroes
3808 WORD *t = &instr[0];
3809
3810 ebegin = &*instr.begin();
3811 eend = ebegin+instr.size();
3812 int de=0;
3813
3814 for (WORD *e=ebegin; e!=eend; e+=de) {
3815 de = *(e+2);
3816 int n=3;
3817 while (*(e+n) != 0) n+=*(e+n);
3818 n++;
3819 memmove (t, e, n*sizeof(WORD));
3820 *(t+2) = n;
3821 t += n;
3822 }
3823
3824 instr.resize(t - &instr[0]);
3825
3826#ifdef DEBUG
3827 MesPrint ("*** [%s, w=%w] DONE: optimize_greedy(numoper=%d) : numoper=%d",
3828 thetime_str().c_str(), old_num_oper, count_operators(instr));
3829#endif
3830
3831 return instr;
3832}
3833
3834/*
3835 #] optimize_greedy :
3836 #[ recycle_variables :
3837*/
3838
3867vector<WORD> recycle_variables (const vector<WORD> &all_instr) {
3868
3869#ifdef DEBUG_MORE
3870 MesPrint ("*** [%s, w=%w] CALL: recycle_variables", thetime_str().c_str());
3871#endif
3872
3873 // get starting positions of instructions
3874 vector<const WORD *> instr;
3875 const WORD *tbegin = &*all_instr.begin();
3876 const WORD *tend = tbegin+all_instr.size();
3877 for (const WORD *t=tbegin; t!=tend; t+=*(t+2))
3878 instr.push_back(t);
3879 int n = instr.size();
3880
3881 // determine with expressions are connected, how many intermediate
3882 // are needed (assuming it's a expression tree instead of a DAG) and
3883 // sort the leaves such that you need a minimal number of variables
3884 vector<int> vars_needed(n);
3885 vector<bool> vis(n,false);
3886 vector<vector<int> > conn(n);
3887
3888 stack<int> s;
3889 s.push(n);
3890
3891 while (!s.empty()) {
3892 int i=s.top(); s.pop();
3893 if (i>0) {
3894 i--;
3895 if (vis[i]) continue;
3896 vis[i]=true;
3897 s.push(-(i+1));
3898
3899 // find all connections
3900 for (const WORD *t=instr[i]+3; *t!=0; t+=*t)
3901 if (*t!=1+ABS(*(t+*t-1)) && *(t+1)==EXTRASYMBOL) {
3902 int k = *(t+3);
3903 conn[i].push_back(k);
3904 s.push(k+1);
3905 }
3906 }
3907 else {
3908 i=-i-1;
3909
3910 // sort the childs w.r.t. needed variables
3911 vector<pair<int,int> > need;
3912 for (int j=0; j<(int)conn[i].size(); j++)
3913 need.push_back(make_pair(vars_needed[conn[i][j]], conn[i][j]));
3914
3915 // keep the comma expression in proper order
3916 if (*(instr[i]+1) != OPER_COMMA)
3917 sort(need.rbegin(), need.rend());
3918
3919 vars_needed[i] = 1;
3920 for (int j=0; j<(int)need.size(); j++) {
3921 vars_needed[i] = max(vars_needed[i], need[j].first+j);
3922 conn[i][j] = need[j].second;
3923 }
3924 }
3925 }
3926
3927 // order the instructions in depth-first order and determine the first
3928 // and last occurrences of variables
3929 vector<int> order, first(n,0), last(n,0);
3930 vis = vector<bool>(n,false);
3931 s.push(n);
3932
3933 while (!s.empty()) {
3934
3935 int i=s.top(); s.pop();
3936
3937 if (i>0) {
3938 i--;
3939 if (vis[i]) continue;
3940 vis[i]=true;
3941 s.push(-(i+1));
3942 for (int j=(int)conn[i].size()-1; j>=0; j--)
3943 s.push(conn[i][j]+1);
3944 }
3945 else {
3946 i=-i-1;
3947 first[i] = last[i] = order.size();
3948 order.push_back(i);
3949 for (int j=0; j<(int)conn[i].size(); j++) {
3950 int k = conn[i][j];
3951 last[k] = max(last[k], first[i]);
3952 }
3953 }
3954 }
3955
3956 // find the renumbering to recycled variables, where at any time the
3957 // lowest-indexed variable that can be used is chosen
3958 int numvar=0;
3959 set<int> var;
3960 vector<int> renum(n);
3961
3962 for (int i=0; i<(int)order.size(); i++) {
3963 for (int j=0; j<(int)conn[order[i]].size(); j++) {
3964 int k = conn[order[i]][j];
3965 if (last[k] == i) var.insert(renum[k]);
3966 }
3967
3968 if (var.empty()) var.insert(numvar++);
3969 renum[order[i]] = *var.begin(); var.erase(var.begin());
3970 }
3971
3972 // put the number of variables used in a preprocessor variable
3973
3974 // generate new instructions with the renumbering
3975 vector<WORD> newinstr;
3976
3977 for (int i=0; i<(int)order.size(); i++) {
3978 int x = order[i];
3979 int j = newinstr.size();
3980 newinstr.insert(newinstr.end(), instr[x], instr[x]+*(instr[x]+2));
3981
3982 newinstr[j] = renum[newinstr[j]];
3983
3984 for (WORD *t=&newinstr[j+3]; *t!=0; t+=*t)
3985 if (*t!=1+ABS(*(t+*t-1)) && *(t+1)==EXTRASYMBOL)
3986 *(t+3) = renum[*(t+3)];
3987 }
3988
3989#ifdef DEBUG_MORE
3990 MesPrint ("*** [%s, w=%w] DONE: recycle_variables", thetime_str().c_str());
3991#endif
3992
3993 return newinstr;
3994}
3995
3996/*
3997 #] recycle_variables :
3998 #[ optimize_expression_given_Horner :
3999*/
4000
4015
4016#ifdef DEBUG
4017 MesPrint ("*** [%s, w=%w] CALL: optimize_expression_given_Horner", thetime_str().c_str());
4018#endif
4019
4020 GETIDENTITY;
4021
4022 // initialize timer
4023 LONG start_time = TimeWallClock(1);
4024 LONG time_limit = 100 * AO.Optimize.greedytimelimit / (AO.Optimize.horner == O_MCTS ? AO.Optimize.mctsnumkeep : 1);
4025 if (time_limit == 0) time_limit=MAXPOSITIVE;
4026
4027 // pick a Horner scheme from the list
4028 LOCK(optimize_lock);
4029 vector<WORD> Horner_scheme = optimize_best_Horner_schemes.back();
4030 optimize_best_Horner_schemes.pop_back();
4031 UNLOCK(optimize_lock);
4032
4033// if ( ( AO.Optimize.debugflags&2 ) == 2 ) {
4034// MesPrint ("Scheme: %a",Horner_scheme.size(),&(Horner_scheme[0]));
4035// }
4036
4037 // apply Horner scheme
4038 vector<WORD> tree = Horner_tree(optimize_expr, Horner_scheme);
4039
4040 // generate instructions, eventually with CSE
4041 vector<WORD> instr;
4042
4043 if (AO.Optimize.method == O_CSE || AO.Optimize.method == O_CSEGREEDY)
4044 instr = generate_instructions(tree, true);
4045 else
4046 instr = generate_instructions(tree, false);
4048 if (AO.Optimize.method == O_CSEGREEDY || AO.Optimize.method == O_GREEDY) {
4049 instr = merge_operators(instr, false);
4050 instr = optimize_greedy(instr, time_limit-(TimeWallClock(1)-start_time));
4051 instr = merge_operators(instr, true);
4052 instr = optimize_greedy(instr, time_limit-(TimeWallClock(1)-start_time));
4053 }
4054 instr = merge_operators(instr, true);
4055
4056 // recycle the temporary variables
4057 instr = recycle_variables(instr);
4058
4059 // determine the quality of the code and possibly update the best code
4060 int num_oper = count_operators(instr);
4061
4062 LOCK(optimize_lock);
4063 if (num_oper < optimize_best_num_oper) {
4064 optimize_num_vars = Horner_scheme.size();
4065 optimize_best_num_oper = num_oper;
4066 optimize_best_instr = instr;
4067 optimize_best_vars = vector<WORD>(AN.poly_vars, AN.poly_vars+AN.poly_num_vars);
4068 }
4069 UNLOCK(optimize_lock);
4070
4071 // clean poly_vars, that are allocated by Horner_tree
4072 AN.poly_num_vars = 0;
4073 M_free(AN.poly_vars,"poly_vars");
4074
4075#ifdef DEBUG
4076 MesPrint ("*** [%s, w=%w] DONE: optimize_expression_given_Horner", thetime_str().c_str());
4077#endif
4078}
4079
4080/*
4081 #] optimize_expression_given_Horner :
4082 #[ PF_optimize_expression_given_Horner :
4083*/
4084#ifdef WITHMPI
4085
4086// Initialization.
4087void PF_optimize_expression_given_Horner_master_init () {
4088 // Nothing to do for now.
4089}
4090
4091// Wait for an idle slave and return the process number.
4092int PF_optimize_expression_given_Horner_master_next() {
4093
4094 // Find an idle slave.
4095 int next;
4096 PF_LongSingleReceive(PF_ANY_SOURCE, PF_OPT_HORNER_MSGTAG, &next, NULL);
4097 return next;
4098
4099}
4100
4101// The main function on the master.
4102void PF_optimize_expression_given_Horner_master () {
4103
4104#ifdef DEBUG
4105 MesPrint ("*** [%s, w=%w] CALL: PF_optimize_expression_given_Horner_master", thetime_str().c_str());
4106#endif
4107
4108 // pick a Horner scheme from the list
4109 vector<WORD> Horner_scheme = optimize_best_Horner_schemes.back();
4110 optimize_best_Horner_schemes.pop_back();
4111
4112 // Find an idle slave.
4113 int next = PF_optimize_expression_given_Horner_master_next();
4114
4115 // Send a new task to the slave.
4117 int len = Horner_scheme.size();
4118 PF_LongSinglePack(&len, 1, PF_INT);
4119 PF_LongSinglePack(&Horner_scheme[0], len, PF_WORD);
4120 PF_LongSingleSend(next, PF_OPT_HORNER_MSGTAG);
4121
4122#ifdef DEBUG
4123 MesPrint ("*** [%s, w=%w] DONE: PF_optimize_expression_given_Horner_master", thetime_str().c_str());
4124#endif
4125
4126}
4127
4128// Wait for all the slaves to finish their tasks.
4129void PF_optimize_expression_given_Horner_master_wait () {
4130
4131#ifdef DEBUG
4132 MesPrint ("*** [%s, w=%w] CALL: PF_optimize_expression_given_Horner_master_wait", thetime_str().c_str());
4133#endif
4134
4135 // Wait for all the slaves.
4136 for (int i = 1; i < PF.numtasks; i++) {
4137 int next = PF_optimize_expression_given_Horner_master_next();
4138 // Send a null task.
4140 int len = 0;
4141 PF_LongSinglePack(&len, 1, PF_INT);
4142 PF_LongSingleSend(next, PF_OPT_HORNER_MSGTAG);
4143 }
4144
4145 // Combine the result from all the slaves.
4146 optimize_best_num_oper = INT_MAX;
4147 for (int i = 1; i < PF.numtasks; i++) {
4148 PF_LongSingleReceive(PF_ANY_SOURCE, PF_OPT_COLLECT_MSGTAG, NULL, NULL);
4149
4150 int len;
4151
4152 // The first integer is the number of operations.
4153 PF_LongSingleUnpack(&len, 1, PF_INT);
4154
4155 if (len >= optimize_best_num_oper) continue;
4156
4157 // Update the best result.
4158 optimize_best_num_oper = len;
4159 PF_LongSingleUnpack(&len, 1, PF_INT);
4160 optimize_best_instr.resize(len);
4161 PF_LongSingleUnpack(&optimize_best_instr[0], len, PF_WORD);
4162 PF_LongSingleUnpack(&len, 1, PF_INT);
4163 optimize_best_vars.resize(len);
4164 PF_LongSingleUnpack(&optimize_best_vars[0], len, PF_WORD);
4165
4166 optimize_num_vars = optimize_best_vars.size(); // TODO
4167 }
4168
4169#ifdef DEBUG
4170 MesPrint ("*** [%s, w=%w] DONE: PF_optimize_expression_given_Horner_master_wait", thetime_str().c_str());
4171#endif
4172
4173}
4174
4175// The main function on the slaves.
4176void PF_optimize_expression_given_Horner_slave () {
4177
4178#ifdef DEBUG
4179 MesPrint ("*** [%s, w=%w] CALL: PF_optimize_expression_given_Horner_slave", thetime_str().c_str());
4180#endif
4181
4182 optimize_best_Horner_schemes.clear();
4183 optimize_best_num_oper = INT_MAX;
4184
4185 int dummy = 0;
4186 int len;
4187
4188 for (;;) {
4189 // Ask the master for the next task.
4191 PF_LongSinglePack(&dummy, 1, PF_INT);
4192 PF_LongSingleSend(MASTER, PF_OPT_HORNER_MSGTAG);
4193
4194 // Get a task from the master.
4195 PF_LongSingleReceive(MASTER, PF_OPT_HORNER_MSGTAG, NULL, NULL);
4196
4197 // Length of the task.
4198 PF_LongSingleUnpack(&len, 1, PF_INT);
4199
4200 // No task remains.
4201 if (len == 0) break;
4202
4203 // Perform the given task.
4204 optimize_best_Horner_schemes.push_back(vector<WORD>());
4205 vector<WORD> &Horner_scheme = optimize_best_Horner_schemes.back();
4206 Horner_scheme.resize(len);
4207 PF_LongSingleUnpack(&Horner_scheme[0], len, PF_WORD);
4209 }
4210
4211 // Send the result to the master.
4213 PF_LongSinglePack(&optimize_best_num_oper, 1, PF_INT);
4214 if (optimize_best_num_oper != INT_MAX) {
4215 len = optimize_best_instr.size();
4216 PF_LongSinglePack(&len, 1, PF_INT);
4217 PF_LongSinglePack(&optimize_best_instr[0], len, PF_WORD);
4218 len = optimize_best_vars.size();
4219 PF_LongSinglePack(&len, 1, PF_INT);
4220 PF_LongSinglePack(&optimize_best_vars[0], len, PF_WORD);
4221 }
4222 PF_LongSingleSend(MASTER, PF_OPT_COLLECT_MSGTAG);
4223
4224#ifdef DEBUG
4225 MesPrint ("*** [%s, w=%w] DONE: PF_optimize_expression_given_Horner_slave", thetime_str().c_str());
4226#endif
4227
4228}
4229
4230#endif
4231/*
4232 #] PF_optimize_expression_given_Horner :
4233 #[ generate_output :
4234*/
4235
4245VOID generate_output (const vector<WORD> &instr, int exprnr, int extraoffset, const vector<vector<WORD> > &brackets) {
4246
4247#ifdef DEBUG
4248 MesPrint ("*** [%s, w=%w] CALL: generate_output", thetime_str().c_str());
4249#endif
4250
4251 GETIDENTITY;
4252 vector<WORD> output;
4253
4254 // one-indexed instead of zero-indexed
4255 extraoffset++;
4256 int num = 0;
4257 int maxsize = (int)instr.size();
4258 for (int i=0; i<maxsize; i+=instr[i+2]) num++;
4259 int *tstart = (int *)Malloc1(num*sizeof(int),"nplaces");
4260 num = 0;
4261 for (int i=0; i<maxsize; i+=instr[i+2]) tstart[num++] = i;
4262 for (int j=0; j<num; j++) {
4263 int i = tstart[j];
4264
4265 // copy arguments
4266 WCOPY(AT.WorkPointer, &instr[i+3], (instr[i+2]-3));
4267
4268 // update maximal variable number
4269 AO.OptimizeResult.maxvar = MaX(AO.OptimizeResult.maxvar, instr[i]+extraoffset);
4270
4271 // renumber symbols and extrasymbols to correct values
4272 for (WORD *t=AT.WorkPointer; *t!=0; t+=*t) {
4273 if (*t == ABS(*(t+*t-1))+1) continue;
4274 if (*(t+1)==SYMBOL) *(t+3) = optimize_best_vars[*(t+3)];
4275 if (*(t+1)==EXTRASYMBOL) {
4276 *(t+1) = SYMBOL;
4277 *(t+3) = MAXVARIABLES-*(t+3)-extraoffset;
4278 }
4279 }
4280
4281 // reformat multiplication instructions, since their current
4282 // format is "expr.nr OPER_MUL length arguments", which differs
4283 // from Form's format for a product of symbols
4284 if (instr[i+1]==OPER_MUL) {
4285
4286 WORD *now=AT.WorkPointer+1;
4287 int dt;
4288 bool coeff=false;
4289 int coeff_sign=1;
4290
4291 for (WORD *t=AT.WorkPointer; *t!=0; t+=dt) {
4292 dt = *t;
4293 if (*t == ABS(*(t+*t-1))+1) {
4294 // copy coefficient
4295 memmove(AT.WorkPointer+instr[i+2], t, *t*sizeof(WORD));
4296 coeff = true;
4297 }
4298 else {
4299 // move symbol
4300 int n = *(t+2);
4301 memmove(now, t+1, n*sizeof(WORD));
4302 now += n;
4303 coeff_sign *= SGN(*(t+dt-1));
4304 }
4305 }
4306
4307 if (coeff) {
4308 // add existing coefficient
4309 int n = *(AT.WorkPointer + instr[i+2]) - 1;
4310 memmove(now, AT.WorkPointer+instr[i+2]+1, n*sizeof(WORD));
4311 now += n;
4312 }
4313 else {
4314 // add coefficient of one
4315 *now++=1;
4316 *now++=1;
4317 *now++=3;
4318 }
4319
4320 *(now-1) *= coeff_sign;
4321
4322 *AT.WorkPointer = now - AT.WorkPointer;
4323 *now++ = 0;
4324 }
4325
4326 // in the case of simultaneous optimization of expressions, add the
4327 // brackets to the final expression
4328 if (instr[i+1]==OPER_COMMA) {
4329 WORD *start = AT.WorkPointer + instr[i+2];
4330 WORD *now = start;
4331 int b=0;
4332 for (const WORD *t=AT.WorkPointer; *t!=0; t+=*t) {
4333 if ( ( brackets[b].size() != 0 ) && ( brackets[b][0] == 0 ) ) break;
4334 *now++ = *t + brackets[b].size();
4335 if (brackets[b].size() != 0) {
4336 memcpy(now, &brackets[b][0], brackets[b].size()*sizeof(WORD));
4337 now += brackets[b].size();
4338 }
4339 memcpy(now, t+1, (*t-1)*sizeof(WORD));
4340 now += *t-1;
4341 b++;
4342 }
4343 *now++ = 0;
4344 memmove(AT.WorkPointer, start, (now-start)*sizeof(WORD));
4345 }
4346
4347 // add the number of the extra symbol; if it is the last one,
4348 // replace the extra symbol number with the expression number
4349 if (i+instr[i+2]<(int)instr.size())
4350 output.push_back(instr[i]+extraoffset);
4351 else {
4352 output.push_back(-(exprnr+1));
4353 }
4354
4355 // add code for this symbol
4356 int n=0;
4357 while (*(AT.WorkPointer+n)!=0)
4358 n += *(AT.WorkPointer+n);
4359 n++;
4360 output.insert(output.end(), AT.WorkPointer, AT.WorkPointer+n);
4361 }
4362
4363 // trailing zero
4364 output.push_back(0);
4365
4366 // clear buffer
4367 if (AO.OptimizeResult.code != NULL)
4368 M_free(AO.OptimizeResult.code, "optimize output");
4369
4370 M_free(tstart,"nplaces");
4371
4372 // copy buffer
4373 AO.OptimizeResult.codesize = output.size();
4374 AO.OptimizeResult.code = (WORD *)Malloc1(output.size()*sizeof(WORD), "optimize output");
4375 memcpy(AO.OptimizeResult.code, &output[0], output.size()*sizeof(WORD));
4376
4377#ifdef DEBUG
4378 MesPrint ("*** [%s, w=%w] DONE: generate_output", thetime_str().c_str());
4379#endif
4380}
4381
4382/*
4383 #] generate_output :
4384 #[ generate_expression :
4385*/
4386
4394WORD generate_expression (WORD exprnr) {
4395
4396#ifdef DEBUG
4397 MesPrint ("*** [%s, w=%w] CALL: generate_expression", thetime_str().c_str());
4398#endif
4399
4400 GETIDENTITY;
4401
4402 WORD *oldWorkPointer = AT.WorkPointer;
4403
4404 CBUF *C = cbuf+AC.cbufnum;
4405 WORD *term = AT.WorkPointer, oldcurexpr = AR.CurExpr;
4406 POSITION position;
4407 EXPRESSIONS e = Expressions+exprnr;
4408 SetScratch(AR.infile,&(e->onfile));
4409
4410 if ( GetTerm(BHEAD term) <= 0 ) {
4411 MesPrint("Expression %d has problems in scratchfile",exprnr);
4412 Terminate(-1);
4413 }
4414 SeekScratch(AR.outfile,&position);
4415 e->onfile = position;
4416 if ( PutOut(BHEAD term,&position,AR.outfile,0) < 0 ) {
4417 MesPrint("Expression %d has problems in output scratchfile",exprnr);
4418 Terminate(-1);
4419 }
4420
4421 AR.CurExpr = exprnr;
4422 NewSort(BHEAD0);
4423
4424 // scan for the original expression (marked by *t<0) and give the
4425 // terms to Generator
4426 WORD *t = AO.OptimizeResult.code;
4427 {
4428 WORD old = AR.Cnumlhs; AR.Cnumlhs = 0;
4429 while (*t!=0) {
4430 bool is_expr = *t < 0;
4431 t++;
4432 while (*t!=0) {
4433 if (is_expr) {
4434 memcpy(AT.WorkPointer, t, *t*sizeof(WORD));
4435 Generator(BHEAD AT.WorkPointer, C->numlhs);
4436 }
4437 t+=*t;
4438 }
4439 t++;
4440 }
4441 AR.Cnumlhs = old;
4442 }
4443
4444 // final sorting
4445 if (EndSort(BHEAD NULL,0) < 0) {
4447 Terminate(-1);
4448 }
4449
4450 AT.WorkPointer = oldWorkPointer;
4451 AR.CurExpr = oldcurexpr;
4452
4453#ifdef DEBUG
4454 MesPrint ("*** [%s, w=%w] DONE: generate_expression", thetime_str().c_str());
4455#endif
4456
4457 return 0;
4458}
4459
4460/*
4461 #] generate_expression :
4462 #[ optimize_print_code :
4463*/
4464
4474VOID optimize_print_code (int print_expr) {
4475
4476#ifdef DEBUG
4477 MesPrint ("*** [%s, w=%w] CALL: optimize_print_code", thetime_str().c_str());
4478#endif
4479 if ( ( AO.Optimize.debugflags & 1 ) != 0 ) {
4480/*
4481 * The next code is for debugging purposes. We may want the statements
4482 * in reverse order to substitute them all back.
4483 * Jan used a Mathematica program to do this. Here we make that
4484 * Format Ox,debugflag=1;
4485 * Creates reverse order during printing.
4486 * All we have to do is put id in front of the statements. This is done
4487 * in PrintExtraSymbol.
4488 */
4489 WORD *t = AO.OptimizeResult.code;
4490 WORD num = 0; // First we count the number of objects.
4491 while (*t!=0) {
4492 num++;
4493 t++; while (*t!=0) t+=*t; t++;
4494 }
4495 WORD **tstart = (WORD **)Malloc1(num*sizeof(WORD *),"print objects");
4496 t = AO.OptimizeResult.code; num = 0; // Now we get the addresses
4497 while (*t!=0) {
4498 tstart[num++] = t;
4499 t++; while (*t!=0) t+=*t; t++;
4500 }
4501 // Flip the addresses
4502 int halfnum = num/2;
4503 for (int i=0; i<halfnum; i++) { swap(tstart[i],tstart[num-1-i]); }
4504 for ( int i = 0; i < num; i++ ) {
4505 t = tstart[i];
4506 if (*t > 0)
4507 PrintExtraSymbol(*t, t+1, EXTRASYMBOL);
4508 else if (print_expr)
4509 PrintExtraSymbol(-*t-1, t+1, EXPRESSIONNUMBER);
4510 }
4511 CBUF *C = cbuf + AM.sbufnum;
4512 if (C->numrhs >= AO.OptimizeResult.minvar)
4513 PrintSubtermList(AO.OptimizeResult.minvar, C->numrhs);
4514 }
4515 else {
4516 // print extra symbols from ConvertToPoly in optimization
4517 CBUF *C = cbuf + AM.sbufnum;
4518 if (C->numrhs >= AO.OptimizeResult.minvar)
4519 PrintSubtermList(AO.OptimizeResult.minvar, C->numrhs);
4520
4521 WORD *t = AO.OptimizeResult.code;
4522 while (*t!=0) {
4523 if (*t > 0) {
4524 PrintExtraSymbol(*t, t+1, EXTRASYMBOL);
4525 }
4526 else if (print_expr)
4527 PrintExtraSymbol(-*t-1, t+1, EXPRESSIONNUMBER);
4528 t++;
4529 while (*t!=0) t+=*t;
4530 t++;
4531 }
4532 }
4533
4534#ifdef DEBUG
4535 MesPrint ("*** [%s, w=%w] DONE: optimize_print_code", thetime_str().c_str());
4536#endif
4537}
4538
4539/*
4540 #] optimize_print_code :
4541 #[ Optimize :
4542*/
4543
4587int Optimize (WORD exprnr, int do_print) {
4588
4589#if defined(WITHMPI) && (defined(DEBUG) || defined(DEBUG_MORE) || defined(DEBUG_MCTS) || defined(DEBUG_GREEDY))
4590 // set AS.printflag negative temporary.
4591 struct save_printflag {
4592 save_printflag() {
4593 oldprintflag = AS.printflag;
4594 AS.printflag = -1;
4595 }
4596 ~save_printflag() {
4597 AS.printflag = oldprintflag;
4598 }
4599 int oldprintflag;
4600 } save_printflag_;
4601#endif
4602
4603#ifdef DEBUG
4604 MesPrint ("*** [%s, w=%w] CALL: Optimize", thetime_str().c_str());
4605 MesPrint ("*** %"); PrintRunningTime();
4606#endif
4607
4608#ifdef WITHPTHREADS
4609 optimize_lock = dummylock;
4610#endif
4611
4612 AO.OptimizeResult.minvar = (cbuf + AM.sbufnum)->numrhs + 1;
4613
4614 if (get_expression(exprnr) < 0) return -1;
4615 vector<vector<WORD> > brackets = get_brackets();
4616
4617#ifdef DEBUG
4618#ifdef WITHMPI
4619 if (PF.me == MASTER)
4620#endif
4621 MesPrint ("*** runtime after preparing the expression: %"); PrintRunningTime();
4622#endif
4623
4624 if (optimize_expr[0]==0 ||
4625 (optimize_expr[optimize_expr[0]]==0 && optimize_expr[0]==ABS(optimize_expr[optimize_expr[0]-1])+1) ||
4626 (optimize_expr[optimize_expr[0]]==0 && optimize_expr[0]==8 &&
4627 optimize_expr[5]==1 && optimize_expr[6]==1 && ABS(optimize_expr[7])==3)) {
4628 if (AO.OptimizeResult.code != NULL)
4629 M_free(AO.OptimizeResult.code, "optimize output");
4630
4631 // zero terms or one trivial term (number or +/-variable), so no
4632 // optimization, so copy expression; special case because without
4633 // operators the optimization crashes
4634 AO.OptimizeResult.code = (WORD *)Malloc1((optimize_expr[0]+3)*sizeof(WORD), "optimize output");
4635 AO.OptimizeResult.code[0] = -(exprnr+1);
4636 memcpy(AO.OptimizeResult.code+1, optimize_expr, (optimize_expr[0]+1)*sizeof(WORD));
4637 AO.OptimizeResult.code[optimize_expr[0]+2] = 0;
4638 }
4639 else {
4640 // find Horner scheme(s)
4641 optimize_best_Horner_schemes.clear();
4642 if (AO.Optimize.horner == O_OCCURRENCE) {
4643 if (AO.Optimize.hornerdirection != O_BACKWARD)
4644 optimize_best_Horner_schemes.push_back(occurrence_order(optimize_expr, false));
4645 if (AO.Optimize.hornerdirection != O_FORWARD)
4646 optimize_best_Horner_schemes.push_back(occurrence_order(optimize_expr, true));
4647 }
4648 else {
4649 if (AO.Optimize.horner == O_SIMULATED_ANNEALING) {
4650 optimize_best_Horner_schemes.push_back(simulated_annealing());
4651 }
4652 else {
4653 mcts_best_schemes.clear();
4654 if ( AO.inscheme ) {
4655 optimize_best_Horner_schemes.push_back(vector<WORD>(AO.schemenum));
4656 for ( int i=0; i < AO.schemenum; i++ )
4657 optimize_best_Horner_schemes[0][i] = AO.inscheme[i];
4658 }
4659 else {
4660 for ( int i = 0; i < AO.Optimize.mctsnumrepeat; i++ )
4662 // generate results
4663 for (set<pair<int,vector<WORD> > >::iterator i=mcts_best_schemes.begin(); i!=mcts_best_schemes.end(); i++) {
4664 optimize_best_Horner_schemes.push_back(i->second);
4665#ifdef DEBUG_MCTS
4666 MesPrint ("{%a} -> %d",i->second.size(), &i->second[0], i->first);
4667#endif
4668 }
4669 }
4670 // clear the tree by making a new empty one.
4671 mcts_root = tree_node();
4672 }
4673 }
4674#ifdef DEBUG
4675#ifdef WITHMPI
4676 if (PF.me == MASTER)
4677#endif
4678 MesPrint ("*** runtime after Horner: %"); PrintRunningTime();
4679#endif
4680
4681#ifdef WITHMPI
4682 if (PF.me == MASTER ) {
4683 PF_optimize_expression_given_Horner_master_init();
4684#endif
4685
4686 // find best Horner scheme and results
4687 optimize_best_num_oper = INT_MAX;
4688
4689 int imax = (int)optimize_best_Horner_schemes.size();
4690
4691 for (int i=0; i<imax; i++) {
4692#if defined(WITHPTHREADS)
4693 if (AM.totalnumberofthreads > 1)
4694 optimize_expression_given_Horner_threaded();
4695 else
4696#elif defined(WITHMPI)
4697 if (PF.numtasks > 1)
4698 PF_optimize_expression_given_Horner_master();
4699 else
4700#endif
4702 }
4703
4704#ifdef WITHMPI
4705 PF_optimize_expression_given_Horner_master_wait();
4706 }
4707 else {
4708 if (PF.numtasks > 1)
4709 PF_optimize_expression_given_Horner_slave();
4710 }
4711#endif
4712
4713#ifdef WITHPTHREADS
4714 MasterWaitAll();
4715#endif
4716 // format results, then print it (for "Print") or modify
4717 // expression (for "#Optimize")
4718#ifdef WITHMPI
4719 if (PF.me == MASTER)
4720#endif
4721 generate_output(optimize_best_instr, exprnr, cbuf[AM.sbufnum].numrhs, brackets);
4722#ifdef WITHMPI
4723 else {
4724 // non-null dummy code for slaves
4725 if (AO.OptimizeResult.code == NULL) {
4726 AO.OptimizeResult.code = (WORD *)Malloc1(sizeof(WORD), "optimize output");
4727 }
4728 }
4729#endif
4730 }
4731
4732#ifdef WITHMPI
4733 if (PF.me == MASTER) {
4735 PF_Pack(&AO.OptimizeResult.minvar, 1, PF_WORD);
4736 PF_Pack(&AO.OptimizeResult.maxvar, 1, PF_WORD);
4737 }
4738 PF_Broadcast();
4739 if (PF.me != MASTER) {
4740 PF_Unpack(&AO.OptimizeResult.minvar, 1, PF_WORD);
4741 PF_Unpack(&AO.OptimizeResult.maxvar, 1, PF_WORD);
4742 }
4743#endif
4744
4745 // set preprocessor variables
4746 char str[100];
4747 snprintf (str,100,"%d",AO.OptimizeResult.minvar);
4748 PutPreVar((UBYTE *)"optimminvar_",(UBYTE *)str,0,1);
4749 snprintf (str,100,"%d",AO.OptimizeResult.maxvar);
4750 PutPreVar((UBYTE *)"optimmaxvar_",(UBYTE *)str,0,1);
4751
4752 if (do_print) {
4753#ifdef WITHMPI
4754 if (PF.me == MASTER)
4755#endif
4757 ClearOptimize();
4758 }
4759 else {
4760#ifdef WITHMPI
4761 if (PF.me == MASTER)
4762#endif
4763 generate_expression(exprnr);
4764 }
4765
4766#ifdef WITHMPI
4767 if (PF.me == MASTER) {
4768#endif
4769
4770 if ( AO.Optimize.printstats > 0 ) {
4771 char str[20];
4772 MesPrint("");
4773 count_operators(optimize_expr,true);
4774 int numop = count_operators(optimize_best_instr,true);
4775 snprintf(str,20,"%d",numop);
4776 PutPreVar((UBYTE *)"optimvalue_",(UBYTE *)str,0,1);
4777 }
4778 else {
4779 char str[20];
4780 int numop = count_operators(optimize_best_instr,false);
4781 snprintf(str,20,"%d",numop);
4782 PutPreVar((UBYTE *)"optimvalue_",(UBYTE *)str,0,1);
4783 }
4784
4785 if ( ( AO.Optimize.schemeflags&1 ) == 1 ) {
4786 GETIDENTITY
4787 UBYTE *OutScr, *Out, *old1 = AO.OutputLine, *old2 = AO.OutFill;
4788 int i, sym;
4789 AO.OutputLine = AO.OutFill = (UBYTE *)AT.WorkPointer;
4790 FiniLine();
4791 OutScr = (UBYTE *)AT.WorkPointer + ( TOLONG(AT.WorkTop) - TOLONG(AT.WorkPointer) ) /2;
4792 TokenToLine((UBYTE *)" Scheme selected: ");
4793 for ( i = 0; i < optimize_num_vars; i++ ) {
4794 Out = OutScr;
4795 sym = optimize_best_vars[i];
4796 if ( i > 0 ) TokenToLine((UBYTE *)",");
4797 if ( sym < NumSymbols ) {
4798 StrCopy(FindSymbol(sym),OutScr);
4799/* StrCopy(VARNAME(symbols,sym),OutScr); */
4800 }
4801 else {
4802 Out = StrCopy((UBYTE *)AC.extrasym,Out);
4803 if ( AC.extrasymbols == 0 ) {
4804 Out = NumCopy((MAXVARIABLES-sym),Out);
4805 Out = StrCopy((UBYTE *)"_",Out);
4806 }
4807 else if ( AC.extrasymbols == 1 ) {
4808 Out = AddArrayIndex((MAXVARIABLES-sym),Out);
4809 }
4810 }
4811 TokenToLine(OutScr);
4812 }
4813 TokenToLine((UBYTE *)";");
4814 FiniLine();
4815 AO.OutFill = old2;
4816 AO.OutputLine = old1;
4817 }
4818
4819 {
4820 GETIDENTITY
4821 UBYTE *OutScr, *Out, *outstring = 0;
4822 int i, sym;
4823 AO.OutputLine = AO.OutFill = (UBYTE *)AT.WorkPointer;
4824 OutScr = (UBYTE *)AT.WorkPointer + ( TOLONG(AT.WorkTop) - TOLONG(AT.WorkPointer) ) /2;
4825 for ( i = 0; i < optimize_num_vars; i++ ) {
4826 Out = OutScr;
4827 sym = optimize_best_vars[i];
4828 if ( sym < NumSymbols ) {
4829 StrCopy(FindSymbol(sym),OutScr);
4830/* StrCopy(VARNAME(symbols,sym),OutScr); */
4831 }
4832 else {
4833 Out = StrCopy((UBYTE *)AC.extrasym,Out);
4834 if ( AC.extrasymbols == 0 ) {
4835 Out = NumCopy((MAXVARIABLES-sym),Out);
4836 Out = StrCopy((UBYTE *)"_",Out);
4837 }
4838 else if ( AC.extrasymbols == 1 ) {
4839 Out = AddArrayIndex((MAXVARIABLES-sym),Out);
4840 }
4841 }
4842 outstring = AddToString(outstring,OutScr,1);
4843 }
4844 if ( outstring == 0 ) {
4845 PutPreVar((UBYTE *)"optimscheme_",(UBYTE *)"",0,1);
4846 }
4847 else {
4848 PutPreVar((UBYTE *)"optimscheme_",(UBYTE *)outstring,0,1);
4849 M_free(outstring,"AddToString");
4850 }
4851 }
4852
4853#ifdef WITHMPI
4854 }
4855
4856 // synchronize optimvalue_ and optimscheme_
4857 if ( PF.me == MASTER ) {
4858 UBYTE *value;
4859 int bytes;
4860
4862
4863 value = GetPreVar((UBYTE *)"optimvalue_", WITHERROR);
4864 bytes = strlen((char *)value);
4865 PF_LongMultiPack(&bytes, 1, PF_INT);
4866 PF_LongMultiPack(value, bytes, PF_BYTE);
4867
4868 value = GetPreVar((UBYTE *)"optimscheme_", WITHERROR);
4869 bytes = strlen((char *)value);
4870 PF_LongMultiPack(&bytes, 1, PF_INT);
4871 PF_LongMultiPack(value, bytes, PF_BYTE);
4872 }
4874 if ( PF.me != MASTER ) {
4875 static vector<UBYTE> prevarbuf;
4876 UBYTE *value;
4877 int bytes;
4878
4879 PF_LongMultiUnpack(&bytes, 1, PF_INT);
4880 prevarbuf.reserve(bytes + 1);
4881 value = &prevarbuf[0];
4882 PF_LongMultiUnpack(value, bytes, PF_BYTE);
4883 value[bytes] = '\0'; // null terminator
4884 PutPreVar((UBYTE *)"optimvalue_", value, NULL, 1);
4885
4886 PF_LongMultiUnpack(&bytes, 1, PF_INT);
4887 prevarbuf.reserve(bytes + 1);
4888 value = &prevarbuf[0];
4889 PF_LongMultiUnpack(value, bytes, PF_BYTE);
4890 value[bytes] = '\0'; // null terminator
4891 PutPreVar((UBYTE *)"optimscheme_", value, NULL, 1);
4892 }
4893#endif
4894
4895 // cleanup
4896 M_free(optimize_expr,"LoadOptim");
4897
4898#ifdef DEBUG
4899 MesPrint ("*** [%s, w=%w] DONE: Optimize", thetime_str().c_str());
4900#endif
4901
4902 return 0;
4903}
4904
4905/*
4906 #] Optimize :
4907 #[ ClearOptimize :
4908*/
4909
4925{
4926 char str[20];
4927 WORD numexpr, *w;
4928 int error = 0;
4929 if ( AO.OptimizeResult.code != NULL ) {
4930 M_free(AO.OptimizeResult.code, "optimize output");
4931 AO.OptimizeResult.code = NULL;
4932 AO.OptimizeResult.codesize = 0;
4933 PutPreVar((UBYTE *)"optimminvar_",(UBYTE *)("0"),0,1);
4934 PutPreVar((UBYTE *)"optimmaxvar_",(UBYTE *)("0"),0,1);
4935 PruneExtraSymbols(AO.OptimizeResult.minvar-1);
4936 cbuf[AM.sbufnum].numrhs = AO.OptimizeResult.minvar-1;
4937 AO.OptimizeResult.minvar = AO.OptimizeResult.maxvar = 0;
4938 }
4939 if ( AO.OptimizeResult.nameofexpr != NULL ) {
4940/*
4941 We have to pick up the expression by its name. Numbers may change.
4942 Note that this requires that when we overwrite an expression, we
4943 check that it is not an optimized expression. See execute.c and
4944 comexpr.c
4945*/
4946 if ( GetName(AC.exprnames,AO.OptimizeResult.nameofexpr,&numexpr,NOAUTO) != CEXPRESSION ) {
4947 MesPrint("@Internal error while clearing optimized expression %s ",AO.OptimizeResult.nameofexpr);
4948 Terminate(-1);
4949 }
4950 M_free(AO.OptimizeResult.nameofexpr, "optimize expression name");
4951 AO.OptimizeResult.nameofexpr = NULL;
4952 w = &(Expressions[numexpr].status);
4953 *w = SetExprCases(DROP,1,*w);
4954 if ( *w < 0 ) error = 1;
4955 }
4956 snprintf (str,20,"%d",cbuf[AM.sbufnum].numrhs);
4957 PutPreVar(AM.oldnumextrasymbols,(UBYTE *)str,0,1);
4958 PutPreVar((UBYTE *)"optimvalue_",(UBYTE *)("0"),0,1);
4959 PutPreVar((UBYTE *)"optimscheme_",(UBYTE *)("0"),0,1);
4960 return(error);
4961}
4962
4963/*
4964 #] ClearOptimize :
4965*/
WORD NewSort(PHEAD0)
Definition sort.c:592
WORD PutOut(PHEAD WORD *, POSITION *, FILEHANDLE *, WORD)
Definition sort.c:1405
LONG EndSort(PHEAD WORD *, int)
Definition sort.c:682
LONG TimeWallClock(WORD)
Definition tools.c:3476
WORD Generator(PHEAD WORD *, WORD)
Definition proces.c:3101
WORD StoreTerm(PHEAD WORD *)
Definition sort.c:4333
int PutPreVar(UBYTE *, UBYTE *, UBYTE *, int)
Definition pre.c:642
int PF_LongSingleReceive(int src, int tag, int *psrc, int *ptag)
Definition mpi.c:1583
int PF_LongSingleSend(int to, int tag)
Definition mpi.c:1540
int PF_PrepareLongSinglePack(void)
Definition mpi.c:1451
int PF_Unpack(void *buffer, size_t count, MPI_Datatype type)
Definition mpi.c:671
int PF_Receive(int src, int tag, int *psrc, int *ptag)
Definition mpi.c:848
int PF_Send(int to, int tag)
Definition mpi.c:822
int PF_PreparePack(void)
Definition mpi.c:624
int PF_LongSingleUnpack(void *buffer, size_t count, MPI_Datatype type)
Definition mpi.c:1503
int PF_Pack(const void *buffer, size_t count, MPI_Datatype type)
Definition mpi.c:642
int PF_PrepareLongMultiPack(void)
Definition mpi.c:1643
int PF_Broadcast(void)
Definition mpi.c:883
int PF_LongMultiBroadcast(void)
Definition mpi.c:1807
int PF_LongSinglePack(const void *buffer, size_t count, MPI_Datatype type)
Definition mpi.c:1469
vector< WORD > recycle_variables(const vector< WORD > &all_instr)
Definition optimize.cc:3867
int ClearOptimize()
Definition optimize.cc:4924
int count_operators(const WORD *expr, bool print=false)
Definition optimize.cc:401
vector< vector< WORD > > get_brackets()
Definition optimize.cc:281
vector< WORD > Horner_tree(const WORD *expr, const vector< WORD > &order)
Definition optimize.cc:852
vector< WORD > occurrence_order(const WORD *expr, bool rev)
Definition optimize.cc:498
vector< optimization > find_optimizations(const vector< WORD > &instr)
Definition optimize.cc:2651
void find_Horner_MCTS()
Definition optimize.cc:2208
vector< WORD > merge_operators(const vector< WORD > &all_instr, bool move_coeff)
Definition optimize.cc:2356
VOID optimize_print_code(int print_expr)
Definition optimize.cc:4474
void optimize_expression_given_Horner()
Definition optimize.cc:4014
vector< WORD > optimize_greedy(vector< WORD > instr, LONG time_limit)
Definition optimize.cc:3727
WORD generate_expression(WORD exprnr)
Definition optimize.cc:4394
int count_operators_cse(const vector< WORD > &tree)
Definition optimize.cc:1295
LONG get_expression(int exprnr)
Definition optimize.cc:204
bool do_optimization(const optimization optim, vector< WORD > &instr, int newid)
Definition optimize.cc:2884
int partial_factorize(vector< WORD > &instr, int n, int improve)
Definition optimize.cc:3433
VOID generate_output(const vector< WORD > &instr, int exprnr, int extraoffset, const vector< vector< WORD > > &brackets)
Definition optimize.cc:4245
void build_Horner_tree(const WORD **terms, int numterms, int var, int maxvar, int pos, vector< WORD > *res)
Definition optimize.cc:631
void fixarg(UWORD *t, WORD &n)
Definition optimize.cc:593
bool term_compare(const WORD *a, const WORD *b)
Definition optimize.cc:831
void my_random_shuffle(PHEAD RandomAccessIterator fr, RandomAccessIterator to)
Definition optimize.cc:180
int Optimize(WORD exprnr, int do_print)
Definition optimize.cc:4587
WORD getpower(const WORD *term, int var, int pos)
Definition optimize.cc:579
vector< WORD > generate_instructions(const vector< WORD > &tree, bool do_CSE)
Definition optimize.cc:1086
void PF_BroadcastBuffer(WORD **buffer, LONG *length)
Definition parallel.c:2110
VOID LowerSortLevel()
Definition sort.c:4727