Initial, untested implementations.
authorethereal <ethereal@ethv.net>
Sun, 26 Jan 2014 08:24:10 +0000 (01:24 -0700)
committerethereal <ethereal@ethv.net>
Sun, 26 Jan 2014 08:24:10 +0000 (01:24 -0700)
.gitignore [new file with mode: 0644]
CMakeLists.txt [new file with mode: 0644]
Makefile [new file with mode: 0644]
src/CMakeLists.txt [new file with mode: 0644]
src/main.c [new file with mode: 0644]
src/u8s.c [new file with mode: 0644]
src/u8s.h [new file with mode: 0644]

diff --git a/.gitignore b/.gitignore
new file mode 100644 (file)
index 0000000..2d3e039
--- /dev/null
@@ -0,0 +1,5 @@
+/build
+/.vimrc
+
+/u8s_test
+/.ycm_extra_conf.py*
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644 (file)
index 0000000..15e4636
--- /dev/null
@@ -0,0 +1,7 @@
+cmake_minimum_required(VERSION 2.6)
+
+project(yacjs C)
+
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_LIST_DIR})
+
+add_subdirectory(src)
diff --git a/Makefile b/Makefile
new file mode 100644 (file)
index 0000000..0ac7f01
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,16 @@
+.PHONY: all
+all:
+       $(MAKE) -C build
+
+.PHONY: redep
+redep:
+       mkdir -p build/ ; cd build/ ; cmake .. ; cd ..
+
+.PHONY: clean
+clean:
+       $(MAKE) -C build clean
+
+.PHONY: distclean
+distclean:
+       $(MAKE) -C build clean
+       rm -rf build
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
new file mode 100644 (file)
index 0000000..0da1097
--- /dev/null
@@ -0,0 +1,3 @@
+add_definitions(-Wextra -Wall -std=c99)
+
+add_executable(u8s_test u8s.c main.c)
diff --git a/src/main.c b/src/main.c
new file mode 100644 (file)
index 0000000..3f845e6
--- /dev/null
@@ -0,0 +1,6 @@
+#include <stdio.h>
+
+int main() {
+    
+    return 0;
+}
diff --git a/src/u8s.c b/src/u8s.c
new file mode 100644 (file)
index 0000000..9733fda
--- /dev/null
+++ b/src/u8s.c
@@ -0,0 +1,76 @@
+#include <stdlib.h>
+#include <string.h>
+
+#include "u8s.h"
+
+u8s_cptr u8s_next(u8s_cptr str) {
+    // non-extended character?
+    if((*str & 0xc0) == 0x0) return str+1;
+    // the annoying case, in the middle of a character
+    if((*str & 0xc0) == 0x80) {
+        while((*str & 0xc0) == 0x80) str ++;
+        return str;
+    }
+    // beginning of extended character. We know what to do!
+    // two-byte?
+    if((*str & 0xe0) == 0xc0) return str+2;
+    // three-byte?
+    if((*str & 0xf0) == 0xe0) return str+3;
+    // four-byte?
+    if((*str & 0xf8) == 0xf0) return str+4;
+    // five-byte?
+    if((*str & 0xfc) == 0xf8) return str+5;
+    // six-byte?
+    if((*str & 0xfe) == 0xfc) return str+6;
+
+    // This should never happen, it's a malformed byte.
+    return str+1;
+}
+
+size_t u8s_strlen(u8s_cptr str) {
+    size_t result = 0;
+    while(*str != 0) {
+        str = u8s_next(str);
+        result ++;
+    }
+    return result;
+}
+
+size_t u8s_strlenb(u8s_cptr str) {
+    size_t result = 0;
+    while(*str != 0) str ++, result ++;
+    return result;
+}
+
+u8s_ptr u8s_strcpy(u8s_ptr target, u8s_cptr src) {
+    do {
+        *(target ++) = *(src ++);
+    } while(*src != 0);
+    *target = 0;
+    return target;
+}
+
+u8s_ptr u8s_strncpy(u8s_ptr target, u8s_cptr src, size_t bufsiz) {
+    size_t used = 0;
+    while(*src != 0) {
+        u8s_cptr next = u8s_next(src);
+        ptrdiff_t len = next-src;
+        if(used+len >= bufsiz) break;
+        memcpy(target, src, len);
+        target += len, src += len;
+    }
+    *target = 0;
+    return target;
+}
+
+int u8s_strcmp(u8s_cptr a, u8s_cptr b) {
+    return u8s_strncmp(a, b, (unsigned)-1);
+}
+
+int u8s_strncmp(u8s_cptr a, u8s_cptr b, size_t bufsiz) {
+    return strncmp(a, b, bufsiz);
+}
+
+u8s_ptr u8s_strdup(u8s_cptr s) {
+    return u8s_strcpy(malloc(u8s_strlenb(s)), s);
+}
diff --git a/src/u8s.h b/src/u8s.h
new file mode 100644 (file)
index 0000000..d8a9a32
--- /dev/null
+++ b/src/u8s.h
@@ -0,0 +1,30 @@
+#ifndef U8S_H
+#define U8S_H
+
+#include <stddef.h>
+
+typedef char * u8s_ptr;
+typedef const char * u8s_cptr;
+
+/* Get beginning of next UTF-8 character. */
+u8s_cptr u8s_next(u8s_cptr str);
+/* Get length of UTF-8 string in characters. */
+size_t u8s_strlen(u8s_cptr str);
+/* Get length of UTF-8 string in bytes. */
+size_t u8s_strlenb(u8s_cptr str);
+/* Copy UTF-8 string into target. */
+u8s_ptr u8s_strcpy(u8s_ptr target, u8s_cptr src);
+/* Copy at most bufsiz bytes of the source UTF-8 string into target, respecting
+    UTF-8 character boundaries, and ensuring that the target is
+    NULL-terminated. */
+u8s_ptr u8s_strncpy(u8s_ptr target, u8s_cptr src, size_t bufsiz);
+/* Compare two NULL-terminated UTF-8 strings. */
+int u8s_strcmp(u8s_cptr a, u8s_cptr b);
+/* Compare at most the first bufsiz bytes of two possibly non-NULL-terminated
+    UTF-8 strings. */
+int u8s_strncmp(u8s_cptr a, u8s_cptr b, size_t bufsiz);
+/* Create a copy of a NULL-terminated UTF-8 string with memory allocated via
+    malloc(). */
+u8s_ptr u8s_strdup(u8s_cptr s);
+
+#endif