Fast implementation uses only #define directives. The queue is accelerated in memory when the number of elements is not more than INT_MAX value for the currently selected type ($`2^{16}`$ for unsigned short).
#define pkr_fast_queue(type, size)\
struct {\
type buf[size];\
unsigned int tail;\
unsigned int head;\
}
#define pkr_fq_elem_count(queue) (queue.head - queue.tail)
#define pkr_fq_size(queue) (sizeof(queue.buf) / sizeof(queue.buf[0]))
#define pkr_fq_full(queue) (pkr_fq_elem_count(queue) == pkr_fq_size(queue))
#define pkr_fq_empty(queue) (queue.tail == queue.head)
#define pkr_fq_free(queue) (pkr_fq_size(queue) - pkr_fq_count(queue))
#define pkr_fq_push(queue, elem) \
{\
queue.buf[queue.head & (pkr_fq_size(queue) - 1)] = elem;\
queue.head++;\
}
#define pkr_fq_front(queue) (queue.buf[queue.tail & (pkr_fq_size(queue) - 1)])
#define pkr_fq_pop(queue) \
{\
queue.tail++; \
}
#define pkr_fq_flush(queue) \
{\
queue.tail=0;\
queue.head=0;\
}