Some will not like that fp64lib is not integrated with gcc, resulting in some code that does not reflect the “natural” mathematical notation with +, -, * and /, but create statements in it like
float64_t t = fp64_inverse(n);
x = fp64_add( x, t );
How much “easier” would it be if we could just write
x = x + 1/n
There is a way. As the Arduino platform supports C++, you could wrap fp64lib with some operator functions. The following code provides a minimal wrapper, located in the include file Double.h
:
class Double {
public:
Double() { x = 0ULL; }
Double( double f ) { x = fp64_sd( f ); }
Double( float f ) { x = fp64_sd( f ); }
Double( float64_t f ) { x = f; }
Double( int16_t n ) { x = fp64_int16_to_float64( n ); }
Double( uint16_t n ) { x = fp64_uint16_to_float64( n );}
Double( int32_t n ) { x = fp64_int32_to_float64( n ); }
Double( char *s ) { x = fp64_atof( s ); }
Double( const Double& d ) { x = d.x; }
const char* toString() {
return fp64_to_string( x, 17, 15);
}
const char* toString( int prec ) {
return fp64_to_string( x, prec+2, prec);
}
float64_t data() {
return this->x;
}
Double operator+=( const Double& y ) {
this->x = fp64_add( this->x, y.x );
return *this ;
}
Double operator+( const Double& y ) {
Double res;
res.x = fp64_add( this->x, y.x );
return res;
}
Double operator-=( const Double& y ) {
this->x = fp64_sub( this->x, y.x );
return *this;
}
Double operator-( const Double& y ) {
Double res;
res.x = fp64_sub( this->x, y.x );
return res;
}
Double operator-() {
Double res;
res.x = fp64_sub( float64_NUMBER_PLUS_ZERO, this->x );
return res;
}
Double operator*=( const Double& y ) {
this->x = fp64_mul( this->x, y.x );
return *this;
}
Double operator*( const Double& y ) {
Double res;
res.x = fp64_mul( this->x, y.x );
return res;
}
Double operator/=( const Double& y ) {
this->x = fp64_div( this->x, y.x );
return *this;
}
Double operator/( const Double& y ) {
Double res;
res.x = fp64_div( this->x, y.x );
return res;
}
Double operator%=( const Double& y ) {
this->x = fp64_fmod( this->x, y.x );
return *this;
}
Double operator%( const Double &y ) {
Double res;
res.x = fp64_fmod( this->x, y.x );
return res;
}
bool operator<( const Double &y ) {
return fp64_compare( this->x, y.x ) < 0;
}
bool operator>( const Double &y ) {
return fp64_compare( this->x, y.x ) > 0;
}
bool operator<=( const Double &y ) {
return fp64_compare( this->x, y.x ) <= 0;
}
bool operator>=( const Double &y ) {
return fp64_compare( this->x, y.x ) >= 0;
}
bool operator==( const Double &y ) {
return fp64_compare( this->x, y.x ) == 0;
}
bool operator!=( const Double &y ) {
return fp64_compare( this->x, y.x ) != 0;
}
static Double pow( const Double &x, const Double &y ) {
Double res;
res.x = fp64_pow( x.x, y.x );
return res;
}
static Double sqrt( const Double &x ) {
Double res;
res.x = fp64_sqrt( x.x );
return res;
}
static Double pi() {
return Double(float64_NUMBER_PI);
}
static Double log( const Double &x ) {
Double res;
res.x = fp64_log( x.x );
return res;
}
static Double exp( const Double &x ) {
Double res;
res.x = fp64_exp( x.x );
return res;
}
private:
float64_t x;
};
Basically, with that wrapper, a lot of math algorithms can be written in the usual algebraic notation. Using the wrapper, the Leibniz series example can be rewritten into:
#define MAX_COUNT 1000 // # of iterations before printing another approximation
Double x = Double( 1.0 );
Double n = Double( 3.0 );
Double delta = Double( 2.0 );
bool add = false;
void setup() {
Serial.begin(57600);
}
void loop() {
int cnt = 0;
while(1) {
if( add )
x = x + Double(1.0)/n;
else
x = x - Double(1.0)/n;
add = !add;
if( ++cnt == MAX_COUNT ) {
Serial.print( n.toString() );
Serial.print(": ");
Serial.print( x.toString() );
Serial.print("\t");
Double pi = x * Double(4.0);
Serial.println( pi.toString() );
cnt = 0;
}
n = n + Double(2.0);
}
}
Looks nice! But as in real life, beauty comes with a price. And here it comes with a double price: you need more memory (2328 instead of 2212 bytes of flash memory) and you need more execution time: 98,475s instead of 91,682s until n=100001.
But especially for more complex calculations, the benfit of writing “simpler” code, might outweight the penalty of increased memory and time. Here an example of the more complex Gamma-Function, the generalization of the factorial function n! = 1 * 2 * 3 * … * (n-1) * n, with n! = Gamma(n+1).
#include <Double.h>
/*
* Example on how to use the Double wrapper class and compare it to
* native usage of fp64lib.
*
*/
float64_t fp64_gamma1(float64_t zpar) {
const static float64_t par[9] = {
((float64_t)0x3feffffffffff950LLU), // 0.99999999999980993
((float64_t)0x40852429b6c30b05LLU), // 676.5203681218851
((float64_t)0xc093ac8e8ed4171bLLU), //-1259.1392167224028
((float64_t)0x40881a9661d3b4d8LLU), // 771.32342877765313
((float64_t)0xc06613ae51a32f5dLLU), // -176.61502916214059
((float64_t)0x402903c27f8b9c81LLU), // 12.507343278686905
((float64_t)0xbfc1bcb2992b2855LLU), // -0.13857109526572012
((float64_t)0x3ee4f0514e4e324fLLU), // 9.9843695780195716E-6
((float64_t)0x3e8435508f3faeefLLU) // 1.5056327351493116E-7
};
const static float64_t sqrt2pi = ((float64_t)0x40040d931ff62705LLU); // sqrt(2*pi) = 2.5066282746310002
const static float64_t f_NUMBER_05 = ((float64_t)0x3fe0000000000000LLU); //0.5
const static float64_t f_NUMBER_65 = ((float64_t)0x401a000000000000LLU); //6.5
float64_t tt = fp64_add(zpar,f_NUMBER_65);
float64_t dummy = par[0];
float64_t dummy2 = zpar;
for( byte i = 1; i < 9; i++ ) {
dummy = fp64_add(dummy,fp64_div(par[i],dummy2));
dummy2 = fp64_add(dummy2, float64_NUMBER_ONE);
}
dummy = fp64_mul(dummy, fp64_mul(sqrt2pi, fp64_mul(fp64_pow(tt,fp64_sub(zpar,f_NUMBER_05)),fp64_exp(fp64_neg(tt)))));
return dummy;
}
// gamma function (lanczos approximation)
// sources: https://en.wikipedia.org/wiki/Lanczos_approximation
// https://rosettacode.org/wiki/Gamma_function
// Using Double wrapper class
Double Dbl_gamma1(Double zpar) {
const static Double par[9] = {
Double("0.99999999999980993"),
Double("676.5203681218851"),
Double("-1259.1392167224028"),
Double("771.32342877765313"),
Double("-176.61502916214059"),
Double("12.507343278686905"),
Double("-0.13857109526572012"),
Double("9.9843695780195716E-6"),
Double("1.5056327351493116E-7")
};
Double tt = zpar + Double(6.5);
Double res = par[0];
Double dummy2 = zpar;
for( byte i = 1; i < 9; i++ ) {
res += par[i] / dummy2;
dummy2 += Double(1);
}
res *= Double::sqrt( Double(2)*Double::pi() );
res *= Double::pow(tt,zpar-Double(0.5)) * Double::exp(-tt);
return res;
}
void setup() {
Serial.begin(57600);
Serial.println( "First with direct calls to fp64lib" );
for( int i = 0; i < 11; i++ ) {
float64_t x = fp64_int32_to_float64(i);
float64_t res = fp64_gamma1(x);
Serial.print( "gamma(" ); Serial.print(i); Serial.print(")=");
Serial.println( fp64_to_string(res, 15, 13) );
}
Serial.println();
Serial.println( "Now with wrapper class Double" );
for( int i = 0; i < 11; i++ ) {
Double x = Double(i);
Double res = Dbl_gamma1(x);
Serial.print( "gamma(" ); Serial.print(i); Serial.print(")=");
Serial.println( res.toString(13) );
}
Serial.println();
Serial.println( "Now with some non-integer values" );
Double x = Double(1.0);
for( int i = 0; i < 9; i++ ) {
x += Double("0.1");
Double res = Dbl_gamma1(x);
Serial.print( "gamma(" ); Serial.print(x.toString(4)); Serial.print(")=");
Serial.println( res.toString(13) );
}
Serial.println();
Serial.println( "and with some negative values" );
x = Double(-1.0);
for( int i = 0; i < 9; i++ ) {
x -= Double("0.1");
Double res = Dbl_gamma1(x);
Serial.print( "gamma(" ); Serial.print(x.toString(4)); Serial.print(")=");
Serial.println( res.toString(13) );
}
}
void loop() {
}
Example output
First with direct calls to fp64lib
gamma(0)=+INF
gamma(1)=1
gamma(2)=1
gamma(3)=2
gamma(4)=6
gamma(5)=24
gamma(6)=120
gamma(7)=720
gamma(8)=5040
gamma(9)=40320
gamma(10)=362880
Now with wrapper class Double
gamma(0)=+INF
gamma(1)=1
gamma(2)=1
gamma(3)=2
gamma(4)=6
gamma(5)=24
gamma(6)=120
gamma(7)=720
gamma(8)=5040
gamma(9)=40320
gamma(10)=362880
Now with some non-integer values
gamma(1.1)=0.9513507698669
gamma(1.2)=0.9181687423998
gamma(1.3)=0.8974706963063
gamma(1.4)=0.8872638175031
gamma(1.5)=0.8862269254528
gamma(1.6)=0.8935153492877
gamma(1.7)=0.9086387328533
gamma(1.8)=0.9313837709802
gamma(1.9)=0.9617658319074
and with some negative values
gamma(-1.1)=9.7148063829042
gamma(-1.2)=4.850957140523
gamma(-1.3)=3.3283470067895
gamma(-1.4)=2.659271872881
gamma(-1.5)=2.3632718012086
gamma(-1.6)=2.3105828580826
gamma(-1.7)=2.5139235190678
gamma(-1.8)=3.1880859111149
gamma(-1.9)=5.5634547945545