Environment variables substitution in C

287 views Asked by At

I have a string with the following content (UTF-8):

__$FOO ${FOO} ${FOO:def} ${FOO2:-тест}

And environment variable FOO with a value test. My C application should work like a GNU envsubs - replace all $FOO or ${FOO} entries with a test - nothing complicated. Expected result:

__test test test тест

But... How can I do this using C only? I can't use something like exec or external (dynamic) libraries (my app is statically linked for using in docker scratch).

I know about envsubst from gettext, but it does not support the default values, as a minimum.

I found libraries with all required features in Go - stephenc/envsub and Rust - stephenc/envsub, but maybe anyone knows how I can do the same in C? I don't want to invent something that has probably already been invented.

static char *envsubst(char *str) {
  // magic
}
2

There are 2 answers

3
Paramtamtаm On BEST ANSWER

Since I could not find an answer, I decided to write my own parser. It has less functionality than I was looking for, but that was enough for my case:

#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#include "envsubst.h"

struct buffer {
        char *data;
        unsigned int len, cap;
};

static struct buffer *newBuf(unsigned int cap) {
    struct buffer *b = malloc(sizeof(struct buffer));

    b->data = malloc(cap * sizeof(char));
    b->cap = cap;
    memset(b->data, '\0', b->cap);

    return b;
}

static void emptyBuf(struct buffer *buf) {
    if (buf->len > 0) {
        memset(buf->data, '\0', buf->cap);
        buf->len = 0;
    }
}

static void writeInBuf(struct buffer *buf, const char c) {
    if (buf->cap <= buf->len + 1) {
        size_t newSize = buf->cap + 64; // growing size

        void *newAlloc = realloc(buf->data, newSize);

        if (newSize > buf->cap && newAlloc) {
            size_t diff = newSize - buf->cap;
            void *pStart = ((char *) newAlloc) + buf->cap;
            memset(pStart, '\0', diff);

            buf->data = newAlloc;
            buf->cap = newSize;
        }
    }

    buf->data[buf->len++] = c;
}

static void writeStringInBuf(struct buffer *buf, const char *str) {
    for (unsigned int j = 0; str[j] != '\0'; j++) {
        writeInBuf(buf, str[j]);
    }
}

/**
 * Parse the string and replace patterns in format `${ENV_NAME:-default_value}` with
 * the values from the environment (or default values after `:-` if provided).
 */
char *envsubst(const char *str) {
    size_t strLen = strlen(str);

    if (strLen < 4) {
        return (char*) str;
    }

    struct buffer *result = newBuf(strLen);
    struct buffer *envName = newBuf(32);
    struct buffer *envDef = newBuf(32);

    enum {
            DATA,
            ENV_NAME,
            ENV_DEFAULT,
    } state = DATA, prevState = DATA;
    bool flush = false;
    unsigned int nested = 0;

    for (unsigned int i = 0; str[i] != '\0'; i++) {
        // detect the state
        if (str[i] == '$' && str[i + 1] == '{') {
            i++;
            nested++;
            prevState = state;
            state = ENV_NAME;

            continue;
        } else if ((str[i] == ':' && str[i + 1] == '-') && state == ENV_NAME) {
            i++;
            prevState = state;
            state = ENV_DEFAULT;

            continue;
        } else if (str[i] == '}' && (state == ENV_NAME || state == ENV_DEFAULT)) {
            nested--;

            if (nested == 0) {
                i++;
                prevState = state;
                state = DATA;
                flush = true;
            }
        }

        const char c = str[i];

        // state processing
        switch (state) {
            case ENV_NAME:
                writeInBuf(envName, c);
                break;

            case ENV_DEFAULT:
                writeInBuf(envDef, c);
                break;

            case DATA:
                if (prevState == ENV_NAME || prevState == ENV_DEFAULT) {
                    char *envVar = getenv(envName->data);

                    if (envVar) {
                        writeStringInBuf(result, envVar);
                    } else if (envDef->len > 0) {
                        writeStringInBuf(result, envDef->data);
                    }

                    emptyBuf(envName);
                    emptyBuf(envDef);
                }

                if (flush) {
                    i--;
                    flush = false;

                    continue;
                }

                writeInBuf(result, c);
        }
    }

    free(envName->data);
    free(envName);

    free(envDef->data);
    free(envDef);

    char *data = result->data;
    free(result);

    return data;
}

And the tests:

#include <assert.h>

// tests running: `gcc -o ./tmp/subs ./src/envsubst.c && ./tmp/subs`
int main() {
    putenv("Test_1=foo");
    putenv("__#Test_2=");

    assert(strcmp(
        envsubst("__$_UNSET_VAR_ ${_UNSET_VAR_} ${_UNSET_VAR_:-default value }"),
        "__$_UNSET_VAR_  default value "
    ) == 0);

    assert(strcmp(
        envsubst("${__#Test_2} ${__#Test_2:-foo}${_UNSET_VAR_:-def}${__#Test_2}"), " def"
    ) == 0);

    assert(strcmp(
        envsubst("${Test_1} ${Test_1:-def}${Test_1}"), "foo foofoo"
    ) == 0);

    assert(strcmp(
        envsubst("__$FOO ${bar} $FOO:def ${Test_1:-def} ${Test_1} ${_UNSET_VAR_:-default} bla-bla ${FOO2:-тест}${ABC} ${}${}"),
        "__$FOO  $FOO:def foo foo default bla-bla тест "
    ) == 0);

    assert(strcmp(
        envsubst("${_UNSET_VAR_:-${Test_1}}"), ""
    ) == 0);

    assert(strcmp(
        envsubst("aaa ${}} ${${} bbb"), "aaa } "
    ) == 0);
}
1
Rachid K. On

As you need a shell substitution, you can run implicitly a shell from your program through libc's services like system() or popen(). Those functions fork/exec a shell and the environment of the caller is inherited by the child process (execve() is used internally). This may appear cumbersome from a performance point of view but it does the job:

#include <stdio.h>

// Non reentrant function as it returns a pointer onto a global static buffer
static char *envsubst(char *str) {
  FILE *f;
  static char cmd[256];
  int rc;

  snprintf(cmd, sizeof(cmd), "echo %s", str);
  f = popen(cmd, "r");
  rc = fread(cmd, 1, sizeof(cmd), f);
  cmd[rc]='\0';
  pclose(f);

  return cmd;
}

int main(int ac, char *av[])
{
  if (ac == 2) {
    char *p = envsubst(av[1]);
    printf("%s", p);
  }
  return 0;
}

popen() calls /bin/sh. The previous works if /bin/sh or /usr/bin/sh points on bash and not on any esoteric shell like dash which does not understand this kind of variable substitution.

$ ./envp '__$FOO ${FOO} ${FOO:def} ${FOO2:-TECT}'
sh: 1: Bad substitution
$ ls -l /bin/sh
lrwxrwxrwx 1 root root 4 août  20 20:30 /bin/sh -> dash
$ sudo rm /bin/sh
$ sudo ln -s bash /bin/sh
$ ls -l /bin/sh
lrwxrwxrwx 1 root root 4 août  20 20:36 /bin/sh -> bash
$ export FOO=test
$ export FOO2=test2
$ ./envp '__$FOO ${FOO} ${FOO:def} ${FOO2:-TECT}'
__test test test test2
$ unset FOO2
$ ./envp '__$FOO ${FOO} ${FOO:def} ${FOO2:-TECT}'
__test test test TECT
$ unset FOO 
$ ./envp '__$FOO ${FOO} ${FOO:def} ${FOO2:-TECT}'
__ TECT

PS: Reading again your question, you mention that you can't use external libs as you are statically linked. So, I am not sure if you are able to use the libc and its popen()/pclose() services...