commit 6514358e6197e0802f517adaaf777118de1fbcc7 Author: Randy Jordan Date: Wed Apr 8 19:39:59 2026 -0500 Initial commit diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..8aa2645 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) [year] [fullname] + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..f1b409e --- /dev/null +++ b/Makefile @@ -0,0 +1,85 @@ +# Compiler Flags +CC := gcc +CFLAGS := -g -Wall -Wextra -Werror -pedantic -fno-omit-frame-pointer + +# Directory variables +LIBDIR := lib +OBJ := obj +INC := include +SRC := src +TEST := tests + +# Filepath Pattern Matching +LIB := $(LIBDIR)/lib.a +SRCS := $(wildcard $(SRC)/*.c) +OBJS := $(patsubst $(SRC)/%.c, $(OBJ)/%.o, $(SRCS)) +TESTS := $(wildcard $(TEST)/*.c) +TESTBINS := $(patsubst $(TEST)/%.c, $(TEST)/bin/%, $(TESTS)) + +# Commands must be labeled PHONY +.PHONY: all release clean test + +# Compiler Release Flags +release: CFLAGS := -Wall -Wextra -Werror -pedantic -fsanitize=address,undefined -fno-omit-frame-pointer -O2 -DNDEBUG +release: clean $(LIB) + +# Target for compilation. +all: $(LIB) + +# Target / Dependencies +$(LIB): $(OBJS) | $(LIBDIR) + $(RM) $(LIB) + ar -cvrs $@ $^ + +$(OBJ)/%.o: $(SRC)/%.c $(SRC)/%.h | $(OBJ) + $(CC) $(CFLAGS) -c $< -o $@ + +$(OBJ)/%.o: $(SRC)/%.c | $(OBJ) + $(CC) $(CFLAGS) -c $< -o $@ + +$(TEST)/bin/%: $(TEST)/%.c $(LIB) | $(TEST)/bin + $(CC) $(CFLAGS) $< $(LIB) -o $@ + +# Make directories if none. +$(LIBDIR): + mkdir $@ + +$(INC): + mkdir $@ + +$(OBJ): + mkdir $@ + +$(TEST)/bin: + mkdir $@ + +# Run the tests in the bin folder and track results +test: $(LIB) $(TESTBINS) + @SUCCESS=0; FAILURE=0; \ + RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'; NC='\033[0m'; \ + for t in $(TESTBINS); do \ + NAME=$$(basename $$t); \ + START=$$(date +%s%N); \ + if $$t; then \ + RET=0; \ + else \ + RET=$$?; \ + fi; \ + END=$$(date +%s%N); \ + ELAPSED_NS=$$((END - START)); \ + ELAPSED_MS=$$((ELAPSED_NS / 1000000)); \ + if [ $$RET -eq 0 ]; then \ + printf "%-20s %bPASS%b (%b%4d ms%b)\n" "$$NAME" "$$GREEN" "$$NC" "$$YELLOW" "$$ELAPSED_MS" "$$NC"; \ + SUCCESS=$$((SUCCESS + 1)); \ + else \ + printf "%-20s %bFAIL%b (%b%4d ms%b)\n" "$$NAME" "$$RED" "$$NC" "$$YELLOW" "$$ELAPSED_MS" "$$NC"; \ + FAILURE=$$((FAILURE + 1)); \ + fi; \ + done; \ + printf "\nTests completed\n"; \ + printf "SUCCESS: %b%d%b\n" "$$GREEN" "$$SUCCESS" "$$NC"; \ + printf "FAILURE: %b%d%b\n" "$$RED" "$$FAILURE" "$$NC"; \ + test $$FAILURE -eq 0 + +clean: + $(RM) -r $(LIBDIR) $(OBJ) $(TEST)/bin/ diff --git a/README.md b/README.md new file mode 100644 index 0000000..8dfd6f2 --- /dev/null +++ b/README.md @@ -0,0 +1,81 @@ +# spp + +## Description +SPP (Single Pass Parser) is a minimal, zero-allocation parsing utility for C +that operates directly on a character stream. It provides a small set of +composable primitives for building fast, simple parsers without backtracking. + +The library is designed around the idea of consuming input in a single forward +pass, making it ideal for tokenization, lightweight parsing, and embedded +systems. + +## Table of Contents + +* [Features](#features) +* [Todos](#todos) +* [Usage](#usage) +* [Acknowledgments](#acknowledgments) +* [License](#license) + +## Features + +* Single-pass, forward-only parsing +* Zero allocations +* Tiny API surface +* Composable primitives +* Works directly on const char* streams + +## Todos + +* [ ] Add new feature X +* [ ] Improve documentation +* [ ] Write tests + +## Usage +``` +// Returns true if the current character is in the provided character set. +bool spp_is(struct Stream s, const char *list); + +// Returns true if the stream has reached the end '\0' +bool spp_eof(struct Stream s); + +// Consumes and returns the current stream character if in a list. +char spp_take(struct Stream s, const char *list); + +// Consumes characters while they belong to the set. I.E. skip white-space. +uintptr_t spp_skip(struct Stream s, const char *list); + +// Consumes characters until a character from the set is encountered. +uintptr_t spp_until(struct Stream s, const char *list); + +// Returns current cursor position of the stream. +const char *spp_cursor(struct Stream s); + +``` + +## Tokenizing Words + +``` +while (!spp_eof(s)) { + spp_skip(s, WS); + + const char *start = spp_cursor(s); + uintptr_t len = spp_until(s, WS); + + if (len > 0) { + printf("Token: %.*s\n", (int)len, start); + } +} +``` + +## Acknowledgments + +[Tom Preston-Werner README Driven Development](https://tom.preston-werner.com/2010/08/23/readme-driven-development)
+[Make a README](https://www.makeareadme.com/)
+[Choose a LICENSE](https://choosealicense.com/)
+ + +## License + +This project is licensed under the MIT License - see the [LICENSE] file for details. + diff --git a/include/spp.h b/include/spp.h new file mode 100644 index 0000000..6d105e0 --- /dev/null +++ b/include/spp.h @@ -0,0 +1,37 @@ +#ifndef SPP_INCLUDED +#define SPP_INCLUDED + +#include +#include +#include +#include + +// Character sets as strings +#define WS "\t\n\v\f\r " +#define NUM "0123456789" +#define ALPHA "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" +#define ALNUM "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" + +// Macros to check if a character belongs to a set +#define IS_WS(c) (strchr(WS, (c)) != NULL) +#define IS_DIGIT(c) (strchr(NUM, (c)) != NULL) +#define IS_ALPHA(c) (strchr(ALPHA, (c)) != NULL) +#define IS_ALNUM(c) (strchr(ALNUM, (c)) != NULL) + +struct Stream { + const char **content; +}; +typedef struct Stream Stream; + +extern bool spp_is(struct Stream s, const char *list); +extern bool spp_eof(struct Stream s); +extern char spp_take(struct Stream s, const char *list); +extern uintptr_t spp_skip(struct Stream s, const char *list); +extern uintptr_t spp_until(struct Stream s, const char *list); +extern const char *spp_cursor(struct Stream s); + +typedef bool (*parse)(struct Stream s, const char **start, ptrdiff_t *len); +extern uintptr_t spp_parse(struct Stream s, parse fn, const char *buf[], ptrdiff_t cnt[], uintptr_t cap ); + + +#endif //spp.h diff --git a/src/spp.c b/src/spp.c new file mode 100644 index 0000000..7b9beba --- /dev/null +++ b/src/spp.c @@ -0,0 +1,81 @@ +#include "../include/spp.h" + + +#include +#include +#include + +bool spp_is(const struct Stream s, const char *list) { + assert(list != NULL); + assert(s.content != NULL && *s.content != NULL); + + return strchr(list, **s.content); +} + +bool spp_eof(const struct Stream s) { + assert(s.content != NULL && *s.content != NULL); + + return **s.content == 0; +} + +char spp_take(const struct Stream s, const char *list) { + if (!spp_is(s, list)) + return 0; + + const char p = **s.content; + (*s.content)++; + return p; +} + +uintptr_t spp_skip(const struct Stream s, const char *list) { + uintptr_t size = 0; + for (; !spp_eof(s) && spp_is(s, list); size++) { + (*s.content)++; + } + + return size; +} + +uintptr_t spp_until(const struct Stream s, const char *list) { + uintptr_t size = 0; + for (; !spp_eof(s) && !spp_is(s, list); size++) { + (*s.content)++; + } + + return size; +} + +const char *spp_cursor(struct Stream s) { + return *s.content; +} + +uintptr_t spp_parse(struct Stream s, parse fn, const char *buf[], ptrdiff_t cnt[], uintptr_t cap) { + assert(fn != NULL); + assert(buf != NULL); + assert(cnt != NULL); + + uintptr_t n = 0; + + while (!spp_eof(s) && n < cap) { + const char *start = NULL; + ptrdiff_t len = 0; + + const char *before = spp_cursor(s); + + if (!fn(s, &start, &len)) { + // Prevent infinite loop if callback fails to consume + if (spp_cursor(s) == before) { + break; + } + continue; + } + + buf[n] = start; + cnt[n] = len; + n++; + } + + return n; +} + + diff --git a/tests/01_basic_example.c b/tests/01_basic_example.c new file mode 100644 index 0000000..08790f3 --- /dev/null +++ b/tests/01_basic_example.c @@ -0,0 +1,34 @@ +#include "../include/spp.h" +#include + + +bool parse_word(struct Stream s, const char **start, ptrdiff_t *len) { + spp_skip(s, WS); + + const char *begin = spp_cursor(s); + uintptr_t l = spp_skip(s, ALNUM); + + if (l == 0) + return false; + + *start = begin; + *len = (ptrdiff_t)l; + return true; +} + +int main(void) { + const char *input = "hello world 123 test"; + struct Stream s = { &input }; + + const char *buf[16]; + ptrdiff_t len[16]; + + uintptr_t n = spp_parse(s, parse_word, buf, len, 16); + + for (uintptr_t i = 0; i < n; i++) { + printf("Token: %.*s\n", (int)len[i], buf[i]); + } + + return 0; + return 0; +}