I know I could malloc the bufer memory but Im trying to avoid using the heap as I want the data transfer to be as fast as possible. This might be a misconception.
We can use various "pooling" techniques to get the benefits of heap allocation with the speed of a stack allocation:
- https://en.wikipedia.org/wiki/Slab_allocation
- https://en.wikipedia.org/wiki/Memory_pool
- https://en.wikipedia.org/wiki/Object_pool_pattern
I've used this technique in another answer: cs50 pset5 Speller optimisation
With some tweaking, after an initial allocation, virtually all buffer requests can be handled from the buffer pool (and, rarely, if ever go back to malloc for a fresh allocation).
Below is a slight generalization that should be very close to what you need. It compiles but has not been tested.
FILE: bufnew.h
// bufnew.h -- bufnew control
#ifndef _bufnew_h_
#define _bufnew_h_
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
#include <stdatomic.h>
// cached buffer
struct buffer {
unsigned int buf_opt;
struct buffer *buf_next;
char buf_data[4096];
};
// equates to buf_opt
#define OPT_ALLOC (1u << 0) // 1=buffer allocated from heap
#define OPT_LOCK (1u << 1) // 1=allocater lock
// cached buffer allocation control
struct bufpool {
unsigned int pool_opt;
struct buffer *pool_head;
pthread_mutex_t pool_lock;
};
void
pool_init(struct bufpool *pool,unsigned int opt);
struct buffer *
pool_get(struct buffer **hptr);
void
pool_put(struct bufpool *pool,struct buffer *buf);
struct buffer *
poolq_get(struct bufpool *pool);
void
poolq_final(struct bufpool *pool);
struct buffer *
poolx_get(struct bufpool *pool);
void
poolx_final(struct bufpool *pool);
#endif
FILE: pool.c
// pool.c -- common functions for poolq/poolx
#include <bufnew.h>
// pool_init -- initialize a pool
void
pool_init(struct bufpool *pool,unsigned int opt)
{
pool->pool_head = NULL;
pool->pool_opt = opt;
pthread_mutex_init(&pool->pool_lock,NULL);
}
// pool_get -- get the first available cached buffer
struct buffer *
pool_get(struct buffer **hptr)
{
struct buffer *head;
while (1) {
head = atomic_load(hptr);
// no more cached buffers
if (head == NULL)
break;
if (atomic_compare_exchange_strong(hptr,&head,head->buf_next))
break;
}
return head;
}
// pool_put -- put back buffer into free pool
void
pool_put(struct bufpool *pool,struct buffer *buf)
{
struct buffer **hptr = &pool->pool_head;
struct buffer *head = atomic_load(hptr);
while (1) {
buf->buf_next = head;
if (atomic_compare_exchange_strong(hptr,&head,buf))
break;
}
}
FILE: poolq.c
// poolq.c -- quick/simple pool allocater
#include <bufnew.h>
// poolq_get -- get a buffer (add more buffers if necessary)
struct buffer *
poolq_get(struct bufpool *pool)
{
struct buffer **hptr = &pool->pool_head;
struct buffer *newbuf;
do {
// try for uncontended grab
newbuf = pool_get(hptr);
if (newbuf != NULL)
break;
// get new buffer
newbuf = malloc(sizeof(*newbuf));
if (newbuf == NULL) {
perror("poolq_get/malloc");
exit(1);
}
newbuf->buf_next = NULL;
newbuf->buf_opt = OPT_ALLOC;
} while (0);
return newbuf;
}
// poolq_final -- release all buffers to heap
void
poolq_final(struct bufpool *pool)
{
// NOTE: this assumes that all buffers in this pool have been put back into
// the list (via pool_put)
#if 0
pthread_mutex_lock(&pool->pool_lock);
#endif
struct buffer *cur = pool->pool_head;
struct buffer *next;
// free up all chunks to heap
for (; cur != NULL; cur = next) {
next = cur->buf_next;
free(cur);
}
pool->pool_head = NULL;
#if 0
pthread_mutex_unlock(&pool->pool_lock);
#endif
}
FILE: poolx.c
// poolx.c -- full/smart pool allocater
#include <bufnew.h>
#define CHUNK 100
// poolx_get -- get a buffer (add more buffers if necessary)
struct buffer *
poolx_get(struct bufpool *pool)
{
struct buffer **hptr = &pool->pool_head;
struct buffer *newbuf;
while (1) {
// try for uncontended grab
newbuf = pool_get(hptr);
if (newbuf != NULL)
break;
// we're out of free buffers -- prepare for new heap allocation
if (pool->pool_opt & OPT_LOCK)
pthread_mutex_lock(&pool->pool_lock);
// attempt to use the heap
do {
struct buffer *head = atomic_load(hptr);
// somebody put back a buffer between our pool_get call and our
// lock call:
// (1) either a simple pool_put intervened
// (2) another thread got the lock first
// NOTE: this is an "optimization" and not strictly necessary but
// it prevents [harmless] "excessive" allocation by racing threads
if (pool->pool_opt & OPT_LOCK) {
if (head != NULL)
break;
}
// to prevent always going to the heap, allocate several contiguous
// buffers at once
newbuf = malloc(sizeof(*newbuf) * CHUNK);
if (newbuf == NULL) {
perror("malloc");
exit(1);
}
// the last buffer
struct buffer *tail = &newbuf[CHUNK - 1];
unsigned int opt = OPT_ALLOC;
for (struct buffer *cur = newbuf; cur <= tail; ++cur) {
cur->buf_opt = opt;
opt &= ~OPT_ALLOC;
cur->buf_next = cur + 1;
}
// insert our chain of buffers at the front
while (1) {
// FIXME/CAE -- does this need atomic_store so it's visible to
// other threads before the exchange?
#if 1
tail->buf_next = head;
#else
atomic_store(&tail->buf_next,head);
#endif
if (atomic_compare_exchange_strong(hptr,&head,newbuf))
break;
}
} while (0);
if (pool->pool_opt & OPT_LOCK)
pthread_mutex_unlock(&pool->pool_lock);
}
return newbuf;
}
// poolx_final -- release all buffers to heap
void
poolx_final(struct bufpool *pool)
{
// NOTE: this assumes that all buffers in this pool have been put back into
// the list (via pool_put)
#if 0
pthread_mutex_lock(&pool->pool_lock);
#endif
struct buffer *cur = pool->pool_head;
struct buffer *next;
struct buffer *prev = NULL;
struct buffer *head = NULL;
// remove all buffers from list that are _not_ a heap allocation head
for (; cur != NULL; cur = next) {
next = cur->buf_next;
// is this buffer the first one in a chunk received from the heap?
if (cur->buf_opt & OPT_ALLOC) {
if (head == NULL)
head = cur;
prev = cur;
continue;
}
if (prev != NULL)
prev->buf_next = next;
}
// free up all chunks to heap
for (cur = head; cur != NULL; cur = next) {
next = cur->buf_next;
free(cur);
}
pool->pool_head = NULL;
#if 0
pthread_mutex_unlock(&pool->pool_lock);
#endif
}
In the code above, I've used cpp conditionals to denote old vs. new code:
#if 0
// old code
#else
// new code
#endif
#if 1
// new code
#endif
Note: this can be cleaned up by running the file through unifdef -k