我已经尝试了一些东西,似乎我最多比 printf() 函数系列慢 1.5 倍,这让我有点困惑。我认为我在这种情况下遇到的问题是我的设备的寻址是 32 位的,而且我没有 FPU。我已经尝试了几个“ftoa()”实现并将它们限制为只查找小数点左侧的 2 位数字,并给自己留下了一些面包屑,以了解我的更大的整体字符串的总长度我正在尝试构建。归根结底,似乎 32 位系统上 8 位元素数组的性质导致了一堆隐藏的移位操作、按位“或”和按位 NAND 操作,这些操作只会荒谬地减慢速度。 ..
对于这种情况,有人有任何一般提示吗? (除了对 8.24 定点设计的重新架构)我尝试了从所见即所得到专注于执行速度的编译器优化,似乎没有什么比 snprintf 更好。
这是我试过的最快的:
#if (__DEBUG)
#define DATA_FIFO_SIZE (8)
#else
#define DATA_FIFO_SIZE (1024)
#endif
typedef struct
{
int32_t rval[4];
double cval[4];
uint16_t idx;
uint16_t padding; //@attention the compiler was padding with 2 bytes to align to 32bit
} data_fifo_entry;
const char V_ERR_MSG[7] = "ERROR,\0";
static data_fifo_entry data_fifo[DATA_FIFO_SIZE];
static char embed_text[256];
/****
* float to ASCII, adapted from
* https://stackoverflow.com/questions/2302969/how-to-implement-char-ftoafloat-num-without-sprintf-library-function-i#7097567
*
****/
//@attention the following floating point #defs are linked!!
#define MAX_DIGITS_TO_PRINT_FLOAT (6)
#define MAX_SUPPORTED_PRINTABLE_FLOAT (+999999.99999999999999999999999999)
#define MIN_SUPPORTED_PRINTABLE_FLOAT (-999999.99999999999999999999999999)
#define FLOAT_TEST6 (100000.0)
#define FLOAT_TEST5 (10000.0)
#define FLOAT_TEST4 (1000.0)
#define FLOAT_TEST3 (100.0)
#define FLOAT_TEST2 (10.0)
#define FLOAT_TEST1 (1.0)
static inline int ftoa(char *s, const float f_in, const uint8_t precision)
{
float f_p = 0.0001;
float n = f_in;
int neg = (n < 0.0);
int length = 0;
switch (precision)
{
case (1):
{
f_p = 0.1;
break;
}
case (2):
{
f_p = 0.01;
break;
}
case (3):
{
f_p = 0.001;
break;
}
//case (4) is the default assumption
case (5):
{
f_p = 0.00001;
break;
}
case (6):
{
f_p = 0.000001;
break;
}
default: //already assumed, no assignments here
{
break;
}
} /* switch */
// handle special cases
if (isnan(n))
{
strcpy(s, "nan\0");
length = 4;
}
else if ((isinf(n)) || (n >= MAX_SUPPORTED_PRINTABLE_FLOAT) ||
((-1.0 * n) < MIN_SUPPORTED_PRINTABLE_FLOAT))
{
strcpy(s, "inf\0");
length = 4;
}
else if (n == 0.0)
{
int idx;
s[length++] = '+';
s[length++] = '0';
s[length++] = '.';
for (idx = 0; idx < precision; idx++)
{
s[length++] = '0';
}
s[length++] = '\0';
}
else if (((n > 0.0) && (n < f_p)) || ((n < 0.0) && ((-1.0 * n) < f_p)))
{
int idx;
if (n >= 0.0)
{
s[length++] = '+';
}
else
{
s[length++] = '-';
}
s[length++] = '0';
s[length++] = '.';
for (idx = 1; idx < precision; idx++)
{
s[length++] = '0';
}
s[length++] = '\0';
}
else
{
int digit, m;
if (neg)
{
n = -n;
}
// calculate magnitude
if (n >= FLOAT_TEST6)
{
m = 6;
}
else if (n >= FLOAT_TEST5)
{
m = 5;
}
else if (n >= FLOAT_TEST4)
{
m = 4;
}
else if (n >= FLOAT_TEST3)
{
m = 3;
}
else if (n >= FLOAT_TEST2)
{
m = 2;
}
else if (n >= FLOAT_TEST1)
{
m = 1;
}
else
{
m = 0;
}
if (neg)
{
s[length++] = '-';
}
else
{
s[length++] = '+';
}
// set up for scientific notation
if (m < 1.0)
{
m = 0;
}
// convert the number
while (n > f_p || m >= 0)
{
double weight = pow(10.0, m);
if ((weight > 0) && !isinf(weight))
{
digit = floor(n / weight);
n -= (digit * weight);
s[length++] = '0' + digit;
}
if ((m == 0) && (n > 0))
{
s[length++] = '.';
}
m--;
}
s[length++] = '\0';
}
return (length - 1);
} /* ftoa */
static inline void print2_and_idx(int8_t idx1, int8_t idx2, uint16_t fifo_idx)
{
//@attention 10 characters already in the buffer, idx does NOT start at zero
uint8_t idx = V_PREFIX_LENGTH;
char scratch[16] = {'\0'};
char * p_fifo_id;
if ((idx1 >= 0) && (idx1 < MAX_IDX) && (idx2 >= 0) && (idx2 < MAX_IDX) &&
(fifo_idx >= 0) && (fifo_idx < DATA_FIFO_SIZE))
{
ftoa(scratch, data_fifo[fifo_idx].cval[idx1], 4);
memcpy((void *)&embed_text[idx += 7], (void *)scratch, 7);
embed_text[idx++] = ',';
ftoa(scratch, data_fifo[fifo_idx].cval[idx2], 4);
memcpy((void *)&embed_text[idx += 7], (void *)scratch, 7);
embed_text[idx++] = ',';
//!\todo maybe print the .idx as fixed width, zero pad to 5 digits
p_fifo_id = utoa((char *)&embed_text[idx], (unsigned int)data_fifo[fifo_idx].idx, 10);
idx += strlen(p_fifo_id);
embed_text[idx++] = ',';
}
else
{
memcpy((void *)&embed_text[idx], (void *)V_ERR_MSG, 7);
}
} /* print2_and_idx */
最佳答案
不是将 *printf()
与 FP 参数一起使用,而是先将 FP 值转换为缩放整数。
仍然调用 snprintf()
,但使用整数和简单字符参数,我的代码比基线快大约 20 倍。
您的里程可能会有所不同。 YMMV.
//baseline
void format2double_1(char *mystr, double pi, double e) {
snprintf(mystr, 22, "{%+0.4f,%+0.4f}", pi, e);
//puts(mystr);
}
void format2double_2(char *mystr, double pi, double e) {
int pi_i = (int) lrint(pi * 10000.0);
int api_i = abs(pi_i);
int e_i = (int) lrint(e * 10000.0);
int ae_i = abs(e_i);
snprintf(mystr, 22, "{%c%d.%04d,%c%d.%04d}", //
"+-"[pi_i < 0], api_i / 10000, api_i % 10000, //
"+-"[e_i < 0], ae_i / 10000, ae_i % 10000);
//puts(mystr);
}
[编辑]
对于 proper -0.0
文本,使用 "+-"[!!signbit(pi)]
[编辑]
OP 考虑作为 ftoa()
替换的一些想法。中央代码是 lrint(f_in * fscale[precision]);
,它进行舍入和缩放。未经测试。
#define PRINTABLE_MAGNITUDE_LIMIT 1000000
int ftoa_1(char *s, const float f_in, const uint8_t precision) {
int n;
sprintf(s, "%+.*f%n", precision, f_in, &n);
return n;
}
int ftoa_2(char *s, const float f_in, const uint8_t precision) {
float fscale[] = { 1, 10, 100, 1000, 10000, 100000, 1000000 };
long iscale[] = { 1, 10, 100, 1000, 10000, 100000, 1000000 };
assert(precision > 0 && precision < sizeof fscale / sizeof fscale[0]);
// gross range check
if (f_in > -PRINTABLE_MAGNITUDE_LIMIT && f_in < PRINTABLE_MAGNITUDE_LIMIT) {
long value = lrint(f_in * fscale[precision]);
value = labs(value);
long scale = iscale[precision];
long ipart = value / scale;
long fpart = value % scale;
// fine range check
if (ipart < PRINTABLE_MAGNITUDE_LIMIT) {
int n;
sprintf(s, "%c%ld:%0*ld%n", signbit(f_in) ? '-' : '+', ipart, precision,
fpart, &n);
return n;
}
}
// Out of range values need not be of performance concern for now.
return ftoa_1(s, f_in, precision);
}
[编辑]
要将正整数或 0 整数快速转换为字符串而无需移动缓冲区或反转缓冲区,请参见下文。它还返回用于后续字符串构建的字符串长度。
// Convert an unsigned to a decimal string and return its length
size_t utoa_length(char *dest, unsigned u) {
size_t len = 0;
if (u >= 10) {
len = utoa_length(dest, u/10);
dest += len;
}
dest[0] = '0' + u%10;
dest[1] = '\0';
return len + 1;
}
关于c - 如何在 32 位单片机上执行得比 "snprintf(mystr, 22, "{%+0.4f,%+0.4f }", (double)3.14159265, (double) 2.718281828459);"更快,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/46455293/