303 lines
10 KiB
C++
303 lines
10 KiB
C++
#include <stdint.h>
|
|
#if defined(ARDUINO_ARCH_AVR)
|
|
#include <avr/pgmspace.h>
|
|
#else
|
|
#define PROGMEM
|
|
#define pgm_read_byte_near(x) (*(x))
|
|
#endif
|
|
#include "PoorManFloat.h"
|
|
#ifdef TEST
|
|
#include <stdio.h>
|
|
#endif
|
|
|
|
// In FastAccelStepper there is seldom the need for adding or
|
|
// subtracting floats, but multiplication/division/square and power
|
|
// are often in use. Negative numbers and even zero are not needed.
|
|
// Consequently, a purely logarithmic representation is completely
|
|
// sufficient and the necessary range can be achieved by 16 bit signed integers.
|
|
// The interpretation of a signed integer xi is:
|
|
//
|
|
// x = 2^(xi/512)
|
|
//
|
|
// The signed integer range xi (-32768..32767) is mapped to the
|
|
// range (5e-20, 1.8e19). Please note: zero is not included.
|
|
// The greatest constant in use is 2.2e14. So the range is sufficient.
|
|
//
|
|
// In order to map x to xi, log2(x) needs to be calculated.
|
|
// For this we rewrite x to:
|
|
// x = (1 + r) * 2^e
|
|
//
|
|
// With r being in the range 0 <= r < 1.
|
|
// This means e is the largest integer value with 2^e < x.
|
|
// Consequently, e can be derived by counting the leading numbers in
|
|
// the integer x.
|
|
//
|
|
// So xi = 512*log2(x) = 512 * (e + log(1+r))
|
|
//
|
|
// The first 7 bits is plain e with sign and the lower 9 bits is log(1+r)*512
|
|
// xi = eeee_eeem_mmmm_mmmm
|
|
//
|
|
// Example for 16 bit integer:
|
|
// x = 15373 = 0b0011_1100_0000_1101
|
|
//
|
|
// log2(15373)*512 = 7121 (rounded)
|
|
// = 0b0001_1011_1101_0001
|
|
//
|
|
// two leading zeros => e = 15 - 2 = 13 = 0b1101
|
|
// 2^e = 8192
|
|
//
|
|
// => x = 8192 * 1.1_1100_0000_1101
|
|
// r is the decimal part without the leading 1
|
|
// r = 1_1100_0000 / 512
|
|
// log2(1+r) * 512 = 464 = 0b1_1101_0001
|
|
//
|
|
// xi = eeee_eeem_mmmm_mmmm
|
|
// = 0001_1011_1101_0001
|
|
// = 0x1bd1
|
|
//
|
|
// The remaining task is to calculate log2(1+f).
|
|
//
|
|
// log2(1+r) is at the corners identical to r:
|
|
// log2(1+0) = 0 and log2(1+1) = 1
|
|
// So it is interesting to look at function
|
|
// f(1+r) = log2(1+r) - r
|
|
//
|
|
// This function is 0 for r at 0 and 1, positive over the range inbetween
|
|
// and reaches max value of 0.08607 at r = 0.442695.
|
|
// This max value is at r = 1/ln(2)-1
|
|
//
|
|
// As we need actually 512*log2(1+r), then the max value is 44.
|
|
// This allows to multiply with up to 8 to improve the resolution.
|
|
// Here we use factor 4, so two table values can be summed up without overflow.
|
|
//
|
|
// So the log2 can be calculated for e.g.:
|
|
// x = 0001_mmmm_mmmm_mmmm three leading zeros
|
|
//
|
|
// Shift left by leading zeros+1 (removing leading 1) and then right by 5:
|
|
// x = 0001_mmmm_mmmm_mmmm three leading zeros
|
|
// mmmm_mmmm_mmmm_m000 shift left (3+1)
|
|
// 0000_0mmm_mmmm_mmmm shift right (5)
|
|
// if ninth bit is 0:
|
|
// + ttt_tttt0 add table value for mmmm_mmmm shifted one
|
|
// else:
|
|
// + 0ttt_tttt add table value for mmmm_mmmm
|
|
// + 0ttt_tttt add table value for mmmm_mmmm+1
|
|
// + 0000_0001 round
|
|
// result: mmmm_mmmm_m
|
|
// final: 000e_eeem_mmmm_mmmm For up to 32bits (e = 31)
|
|
//
|
|
// We are using a table of length 256, so first 8 bits are the index.
|
|
// In order to improve the resolution, the ninth bit is used to interpolate
|
|
// with the next table entry.
|
|
//
|
|
// Using python3 this can be calculated by:
|
|
// [round(math.log2(i/256) * 256 - (i-256)) for i in range(256,512)]
|
|
//
|
|
// For better precision y_yyyy is shifted by 2 and can be calculated as:
|
|
// [round((math.log2(i/256) * 256 - (i-256))*4) for i in range(256,512)]
|
|
//
|
|
const PROGMEM uint8_t log2_minus_x_plus_one_shifted_by_2[256] = {
|
|
0, 2, 3, 5, 7, 9, 10, 12, 13, 15, 17, 18, 20, 21, 23, 24, 26, 27, 28,
|
|
30, 31, 32, 34, 35, 36, 38, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51,
|
|
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 64, 65, 66, 67, 68, 68,
|
|
69, 70, 70, 71, 72, 72, 73, 74, 74, 75, 75, 76, 77, 77, 78, 78, 79, 79, 80,
|
|
80, 80, 81, 81, 82, 82, 83, 83, 83, 84, 84, 84, 84, 85, 85, 85, 86, 86, 86,
|
|
86, 86, 87, 87, 87, 87, 87, 87, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88,
|
|
88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 87, 87, 87, 87, 87, 87, 86, 86,
|
|
86, 86, 86, 85, 85, 85, 85, 84, 84, 84, 84, 83, 83, 83, 82, 82, 82, 81, 81,
|
|
81, 80, 80, 79, 79, 79, 78, 78, 77, 77, 76, 76, 75, 75, 74, 74, 73, 73, 72,
|
|
72, 71, 71, 70, 70, 69, 68, 68, 67, 67, 66, 65, 65, 64, 63, 63, 62, 61, 61,
|
|
60, 59, 59, 58, 57, 57, 56, 55, 54, 54, 53, 52, 51, 51, 50, 49, 48, 47, 47,
|
|
46, 45, 44, 43, 42, 42, 41, 40, 39, 38, 37, 36, 35, 34, 34, 33, 32, 31, 30,
|
|
29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11,
|
|
10, 9, 8, 7, 6, 4, 3, 2, 1};
|
|
|
|
// For the inverse pow(2,x) needs to be calculated. Similarly it makes sense to
|
|
// evaluate instead
|
|
// g(x) = x - pow(2,x-1)
|
|
//
|
|
// This function equals 0 for x at 1 and 2, with extremum 0.08607 at x
|
|
// = 1.528766. This max. value is at x = 1 - ln(ln(2))/ln(2)
|
|
//
|
|
// Noteworthy the max values of log2(x) - x + 1 and x - pow(2, x-1) are
|
|
// identical, calculated by (-1 + ln(2) - ln(ln(2)))/ln(2)
|
|
//
|
|
// Using python3 this can be calculated by:
|
|
// [round(i - math.pow(2,i/256-1)*256) for i in range(256,512)]
|
|
//
|
|
// Similarly shifted by two bits:
|
|
// [round((i - math.pow(2,i/256-1)*256)*4) for i in range(256,512)]
|
|
//
|
|
const PROGMEM uint8_t x_minus_pow2_of_x_minus_one_shifted_by_2[256] = {
|
|
0, 1, 2, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 18, 19, 20, 21,
|
|
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
|
|
41, 42, 43, 44, 45, 46, 46, 47, 48, 49, 50, 51, 52, 52, 53, 54, 55, 56, 56,
|
|
57, 58, 59, 59, 60, 61, 62, 62, 63, 64, 64, 65, 66, 66, 67, 68, 68, 69, 69,
|
|
70, 71, 71, 72, 72, 73, 73, 74, 74, 75, 76, 76, 76, 77, 77, 78, 78, 79, 79,
|
|
80, 80, 80, 81, 81, 82, 82, 82, 83, 83, 83, 84, 84, 84, 84, 85, 85, 85, 85,
|
|
86, 86, 86, 86, 87, 87, 87, 87, 87, 87, 87, 88, 88, 88, 88, 88, 88, 88, 88,
|
|
88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 87, 87, 87, 87, 87,
|
|
87, 86, 86, 86, 86, 86, 85, 85, 85, 84, 84, 84, 84, 83, 83, 83, 82, 82, 81,
|
|
81, 81, 80, 80, 79, 79, 78, 78, 77, 77, 76, 76, 75, 75, 74, 74, 73, 72, 72,
|
|
71, 71, 70, 69, 68, 68, 67, 66, 66, 65, 64, 63, 63, 62, 61, 60, 59, 58, 58,
|
|
57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39,
|
|
38, 36, 35, 34, 33, 32, 30, 29, 28, 27, 25, 24, 23, 22, 20, 19, 17, 16, 15,
|
|
13, 12, 10, 9, 8, 6, 5, 3, 2};
|
|
|
|
uint8_t leading_zeros(uint8_t x) {
|
|
uint8_t res;
|
|
if ((x & 0xf0) == 0) {
|
|
x <<= 4;
|
|
res = 4;
|
|
} else {
|
|
res = 0;
|
|
}
|
|
if ((x & 0xc0) == 0) {
|
|
x <<= 2;
|
|
res += 2;
|
|
}
|
|
if ((x & 0x80) == 0) {
|
|
res += 1;
|
|
if (x == 0) {
|
|
res += 1;
|
|
}
|
|
}
|
|
return res;
|
|
}
|
|
|
|
pmf_logarithmic pmfl_from(uint8_t x) {
|
|
// calling with x == 0 is considered an error.
|
|
//
|
|
// In a first step convert to
|
|
// 0000_0eee_mmmm_mmmm
|
|
//
|
|
// Hereby mmmm_mmmm are the lower bits right from the first 1 in x
|
|
// An example with only four valid mantissa bits:
|
|
// x = 0001_mmmm => 0000_0100_mmmm_0000
|
|
//
|
|
// eee is the exponent
|
|
//
|
|
// The second convert this to the logarithm of x
|
|
// 1. Use mmmm_mmmm as index in the log2_minus_x_plus_one_shifted_by_1
|
|
// table
|
|
// 2. shift left 0000_0eee_mmmm_mmmm by 1
|
|
// 3. add the value from the log2_minus_x_plus_one_shifted_by_1 table
|
|
uint8_t leading = leading_zeros(x);
|
|
if (leading == 8) {
|
|
return PMF_CONST_INVALID;
|
|
}
|
|
x <<= leading + 1;
|
|
uint8_t e = 7 - leading;
|
|
uint8_t offset = pgm_read_byte_near(&log2_minus_x_plus_one_shifted_by_2[x]);
|
|
uint16_t res = (((uint16_t)e) << 8) | x;
|
|
res <<= 1;
|
|
offset += 1;
|
|
res += offset >> 1;
|
|
return res;
|
|
}
|
|
|
|
pmf_logarithmic pmfl_from(uint16_t x) {
|
|
uint8_t leading = leading_zeros(x >> 8);
|
|
if (leading == 8) {
|
|
return pmfl_from((uint8_t)x);
|
|
}
|
|
// shift msb out
|
|
x <<= leading + 1;
|
|
uint8_t exponent = 15 - leading;
|
|
x >>= 5;
|
|
uint8_t index = x >> 3;
|
|
uint8_t offset =
|
|
pgm_read_byte_near(&log2_minus_x_plus_one_shifted_by_2[index]);
|
|
// only with x & 7 > 2, the calculated constants are correct...
|
|
if ((x & 7) > 2) {
|
|
index++; // overflow to 0 is ok. index is an uint8_t
|
|
offset += pgm_read_byte_near(&log2_minus_x_plus_one_shifted_by_2[index]);
|
|
offset += 1;
|
|
} else {
|
|
offset <<= 1;
|
|
// offset += 1;
|
|
}
|
|
x += offset;
|
|
x >>= 2;
|
|
x += ((uint16_t)exponent) << 9;
|
|
return x;
|
|
}
|
|
pmf_logarithmic pmfl_from(uint32_t x) {
|
|
int16_t exp_offset;
|
|
uint16_t w;
|
|
if ((x & 0xff000000) == 0) {
|
|
if ((x & 0x00ff0000) == 0) {
|
|
w = (uint16_t)x;
|
|
exp_offset = 0;
|
|
} else if ((x & 0x00f00000) == 0) {
|
|
w = x >> 4;
|
|
exp_offset = 0x0800;
|
|
} else {
|
|
w = x >> 8;
|
|
exp_offset = 0x1000;
|
|
}
|
|
} else if ((x & 0xf0000000) == 0) {
|
|
w = x >> 12;
|
|
exp_offset = 0x1800;
|
|
} else {
|
|
w = x >> 16;
|
|
exp_offset = 0x2000;
|
|
}
|
|
return pmfl_from(w) + exp_offset;
|
|
}
|
|
uint16_t pmfl_to_u16(pmf_logarithmic x) {
|
|
if (x < 0) {
|
|
return 0;
|
|
}
|
|
if (x >= PMF_CONST_UINT16_MAX) {
|
|
return __UINT16_MAX__;
|
|
}
|
|
uint8_t exponent = ((uint16_t)x) >> 9;
|
|
x &= 0x01ff;
|
|
x += 0x200;
|
|
uint8_t index = ((uint16_t)x) >> 1;
|
|
x <<= 1;
|
|
uint8_t offset =
|
|
pgm_read_byte_near(&x_minus_pow2_of_x_minus_one_shifted_by_2[index]);
|
|
if ((x & 2) != 0) {
|
|
index++; // overflow to 0 is ok. index is an uint8_t
|
|
offset +=
|
|
pgm_read_byte_near(&x_minus_pow2_of_x_minus_one_shifted_by_2[index]);
|
|
offset >>= 1;
|
|
}
|
|
x -= offset;
|
|
if (exponent > 10) {
|
|
x <<= exponent - 10;
|
|
} else if (exponent < 10) {
|
|
x += (exponent != 9) ? 2 : 1;
|
|
x >>= 10 - exponent;
|
|
}
|
|
return x;
|
|
}
|
|
uint32_t pmfl_to_u32(pmf_logarithmic x) {
|
|
if (x < 0) {
|
|
return 0;
|
|
}
|
|
if (x >= PMF_CONST_UINT32_MAX) {
|
|
return __UINT32_MAX__;
|
|
}
|
|
uint8_t exponent = ((uint16_t)x) >> 9;
|
|
if (exponent < 0x10) {
|
|
return pmfl_to_u16(x);
|
|
}
|
|
uint8_t shift = exponent - 0x0f;
|
|
x = pmfl_shr(x, shift);
|
|
uint32_t res = pmfl_to_u16(x);
|
|
res <<= shift;
|
|
return res;
|
|
}
|
|
pmf_logarithmic pmfl_square(pmf_logarithmic x) {
|
|
if (x > 0x4000) {
|
|
return PMF_CONST_MAX;
|
|
}
|
|
if (x <= -0x4000) {
|
|
return PMF_CONST_MIN;
|
|
}
|
|
return x + x;
|
|
}
|