6;; The following utility macros define basic multi-register
7;; operations
for the integer
square root algorithm from
8;; an
unsigned integer. They all accept `size` parameter
9;; which is the number of *bytes* in the *result*. Some
10;; of them also have `flag`, which is used when the first
11;; instruction is different from the rest of the instructions
12;; (e.g. does not use the C-flag).
14;; Set the initial value val to the
register number rnum.
15;; Also, use SUB instruction to set C=0, since
this is a
16;; precondition at the start.
17.macro initval _rnum _size _val
20 initval (\_rnum + 1), (\_size - 1), \_val
26;; Test next binary digit in developing
square root.
27;; If flag is set, the first instruction does not
29.macro testdigit _rarg _rres _size _flag
31 cp \_rarg + \_size, \_rres
33 cpc \_rarg + \_size, \_rres
36 testdigit (\_rarg + 2), (\_rres + 1), (\_size - 1), 0
40;; Update Y_m (see notations below),
if next binary
41;; digit is set. If flag is set, the first instruction
43.macro updatearg _rarg _rres _size _flag
45 sub \_rarg + \_size, \_rres
47 sbc \_rarg + \_size, \_rres
50 updatearg (\_rarg + 2), (\_rres + 1), (\_size - 1), 0
54;; Set binary digit in the result
if X_m < Y_m is
true.
55.macro setdigit _rres _mask _size
58 setdigit (\_rres + 1), (\_mask + 1), (\_size - 1)
62;; Shift the rotation mask right. If flag is set, the first
63;; instruction does not use C-flag.
64.macro shiftmask _mask _size _flag
66 lsr \_mask + \_size - 1
68 ror \_mask + \_size - 1
71 shiftmask \_mask, (\_size - 1), 0
75;; Set the next test binary digit in the result that will be
76;; tested in the next iteration of the main loop.
77.macro shiftdigit _rres _mask _size
78 eor \_rres + \_size - 1, \_mask + \_size - 1
80 shiftdigit \_rres, \_mask, (\_size - 1)
84;; Argument is rolled left to keep relevant bits in the higher
85;; (size / 2) bytes. During the last iteration,
this also rolls
86;; carry bit into bit 0 of the LSB of the `rarg` registers.
87.macro shiftarg _rarg _size
90 shiftarg (\_rarg + 1), (\_size - 1)
94;; Last bit
requires special treatment. The following macro saves
95;; the last bit into C-flag, which is added to the result at the
97.macro setlast _rarg _rres _size
98 cpc \_rres, \_rarg + \_size
100 setlast (\_rarg + 2), (\_rres + 1), (\_size - 1)
104; Generic macro that generates an integer
square root algorithm, where the
105; result has
'size' bytes. The argument provided in the registers starting
106; with
'rarg' number and has twice the number of bytes than does
109; Based on the sqrt32_floor algorithm:
112; The algorithm is built around the following inequality to test if
113; c_{m-1} binary digit in the result is 0 or 1:
114; (P_m + 2^{m-1})^2 < N^2,
115; where N^2 is the input, and P_m is the current developing
square root that
116; has m binary digits P_m = (c_n, c_{n-1}, ... c_m), where n is the number of
119; This inequality can be rewritten as
122; X_m = 2^m P_m + 2^{2m-2},
124; The recurrence relations to update X_m and Y_m are:
125; 2X_m = X_{m + 1} + 2^{2m} + 2^{2m-1}, (n-bits) stored in rres,
126; Y_m = Y_{m + 1} - X_{m+1} , (n-bits) stored in arg MSB,
127; and the last two terms in 2X_m are kept in the rotation mask.
129; For example,
for 8-bit
square root, starting values are:
130; m = 8, P_8 = 0, X_8 = 2^14 = 0x4000, and Y_8 = N^2.
133; rarg: Register number that contains LSB of the argument.
134; rres: Register number that contains LSB of the result.
135; mask: Register number that contains LSB of the mask.
137; This macro neither saves any call-saved registers, nor sets it any registers
138; in compliance with the AVR ABI. These should be done separately.
139.macro sqrtengine _rarg _rres _mask _size
140 ;; Set initial values and carry flag, C = 0.
141 initval \_mask, \_size, 0xc0 ; Rotation mask
142.if \_size <= 2 || \_rres % 2 != 0 || \_mask % 2 != 0
143 initval \_rres, \_size, 0x40 ; Expanding
square root
145 X_movw \_rres, \_mask ; Copy 0x0000 from _mask
146 initval \_rres+2, \_size-2, 0x40 ; Expanding
square root
151 ;; Precondition: C = 0.
153 brcs 1f ;
if C = 1, X_m < Y_m already
154 testdigit \_rarg, \_rres, \_size, 1
155 brcs 2f ;
if X_m < Y_m, update X_m and Y_m
157 updatearg \_rarg, \_rres, \_size, 1 ; Update Y_m <- Y_m - X_m
158 setdigit \_rres, \_mask, \_size ; Set next test digit in the result.
160 shiftmask \_mask, \_size, 1 ; Shift mask, C = 1 --> end of loop.
161 shiftdigit \_rres, \_mask, \_size
162 shiftarg \_rarg, (2 * \_size) ; Shift right, and save C into bit 0.
163 sbrs \_rarg, 0 ; Exit
if bit 0 is set from the mask
164 rjmp .Lnext_bit ; through the C-flag.
166 brcs 3f ; C = 1 means last bit is 1.
167 lsl \_rarg + \_size - 1 ; Restore last bit in arg from C
168 setlast \_rarg, \_rres, \_size ; after
this, C is the last bit.
170 adc \_rres, __zero_reg__ ; Add last bit from C to the result.