changeset 328:978571e23318

uptools started with libcoding
author Mychaela Falconia <falcon@freecalypso.org>
date Sat, 03 Feb 2018 20:07:05 +0000
parents 973d885a68a0
children 18c692984549
files uptools/libcoding/Makefile uptools/libcoding/decode_helpers.c uptools/libcoding/gsm7_decode.c uptools/libcoding/gsm7_decode_tables.c uptools/libcoding/ucs2_decode.c
diffstat 5 files changed, 206 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/uptools/libcoding/Makefile	Sat Feb 03 20:07:05 2018 +0000
@@ -0,0 +1,13 @@
+CC=	gcc
+CFLAGS=	-O2
+OBJS=	decode_helpers.o gsm7_decode.o gsm7_decode_tables.o ucs2_decode.o
+LIB=	libcoding.a
+
+all:	${LIB}
+
+${LIB}:	${OBJS}
+	ar rcu $@ ${OBJS}
+	ranlib $@
+
+clean:
+	rm -f *.[oa] errs
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/uptools/libcoding/decode_helpers.c	Sat Feb 03 20:07:05 2018 +0000
@@ -0,0 +1,55 @@
+/*
+ * This library module implements the is_decoded_char_ok() and emit_utf8_char()
+ * functions used by gsm7_to_ascii_or_ext() and ucs2_to_ascii_or_ext().
+ */
+
+#include <sys/types.h>
+
+is_decoded_char_ok(uni, ascii_ext)
+	unsigned uni;
+{
+	unsigned upper_limit;
+
+	/* weed out control chars first */
+	if (uni < 0x20)
+		return(0);
+	if (uni >= 0x7F && uni <= 0x9F)
+		return(0);
+	/* see what range our output encoding allows */
+	switch (ascii_ext) {
+	case 0:
+		upper_limit = 0x7F;
+		break;
+	case 1:
+		upper_limit = 0xFF;
+		break;
+	case 2:
+		upper_limit = 0xFFFF;
+		break;
+	default:
+		upper_limit = 0;
+	}
+	if (uni <= upper_limit)
+		return(1);
+	else
+		return(0);
+}
+
+emit_utf8_char(uni, outp)
+	unsigned uni;
+	u_char *outp;
+{
+	if (uni < 0x80) {
+		*outp = uni;
+		return(1);
+	}
+	if (uni < 0x800) {
+		outp[0] = 0xC0 | (uni >> 6);
+		outp[1] = 0x80 | (uni & 0x3F);
+		return(2);
+	}
+	outp[0] = 0xE0 | (uni >> 12);
+	outp[1] = 0x80 | ((uni >> 6) & 0x3F);
+	outp[2] = 0x80 | (uni & 0x3F);
+	return(3);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/uptools/libcoding/gsm7_decode.c	Sat Feb 03 20:07:05 2018 +0000
@@ -0,0 +1,53 @@
+/*
+ * This library module implements the decoding of GSM7-encoded data
+ * into ASCII, ISO 8859-1 or UTF-8.
+ */
+
+#include <sys/types.h>
+
+extern u_short gsm7_decode_table[128];
+extern u_short gsm7ext_decode_table[128];
+
+gsm7_to_ascii_or_ext(inbuf, inlen, outbuf, outlenp, ascii_ext, newline_ok, errp)
+	u_char *inbuf, *outbuf;
+	unsigned inlen, *outlenp, *errp;
+{
+	u_char *inp, *endp, *outp;
+	unsigned errcnt = 0;
+	unsigned gsm, uni;
+
+	inp = inbuf;
+	endp = inbuf + inlen;
+	outp = outbuf;
+	while (inp < endp) {
+		gsm = *inp++;
+		if (gsm == 0x1B && inp < endp)
+			uni = gsm7ext_decode_table[*inp++];
+		else
+			uni = gsm7_decode_table[gsm];
+		if (uni == '\r') {
+			*outp++ = '\\';
+			*outp++ = 'r';
+			errcnt++;
+		} else if (uni == '\n') {
+			if (newline_ok)
+				*outp++ = '\n';
+			else {
+				*outp++ = '\\';
+				*outp++ = 'n';
+				errcnt++;
+			}
+		} else if (!uni || !is_decoded_char_ok(uni, ascii_ext)) {
+			*outp++ = '?';
+			errcnt++;
+		} else if (ascii_ext == 2)
+			outp += emit_utf8_char(uni, outp);
+		else
+			*outp++ = uni;
+	}
+	*outp = '\0';
+	if (outlenp)
+		*outlenp = outp - outbuf;
+	if (errp)
+		*errp = errcnt;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/uptools/libcoding/gsm7_decode_tables.c	Sat Feb 03 20:07:05 2018 +0000
@@ -0,0 +1,36 @@
+/*
+ * This library module contains the tables for decoding the GSM 7-bit
+ * default alphabet (03.38 or 23.038) into Unicode.
+ */
+
+#include <sys/types.h>
+
+u_short gsm7_decode_table[128] = {
+	'@',  0xA3, '$',  0xA5, 0xE8, 0xE9, 0xF9, 0xEC,
+	0xF2, 0xC7, '\n', 0xD8, 0xF8, '\r', 0xC5, 0xE5,
+	0x394,'_',  0x3A6,0x393,0x39B,0x3A9,0x3A0,0x3A8,
+	0x3A3,0x398,0x39E, 0,   0xC6, 0xE6, 0xDF, 0xC9,
+	' ',  '!',  '"',  '#',  0xA4, '%',  '&',  0x27,
+	'(',  ')',  '*',  '+',  ',',  '-',  '.',  '/',
+	'0',  '1',  '2',  '3',  '4',  '5',  '6',  '7',
+	'8',  '9',  ':',  ';',  '<',  '=',  '>',  '?',
+	0xA1, 'A',  'B',  'C',  'D',  'E',  'F',  'G',
+	'H',  'I',  'J',  'K',  'L',  'M',  'N',  'O',
+	'P',  'Q',  'R',  'S',  'T',  'U',  'V',  'W',
+	'X',  'Y',  'Z',  0xC4, 0xD6, 0xD1, 0xDC, 0xA7,
+	0xBF, 'a',  'b',  'c',  'd',  'e',  'f',  'g',
+	'h',  'i',  'j',  'k',  'l',  'm',  'n',  'o',
+	'p',  'q',  'r',  's',  't',  'u',  'v',  'w',
+	'x',  'y',  'z',  0xE4, 0xF6, 0xF1, 0xFC, 0xE0
+};
+
+u_short gsm7ext_decode_table[128] = {
+	0,   0, 0, 0, 0,   0,   0, 0, 0,   0,   '\n', 0, 0,   '\r', 0,   0,
+	0,   0, 0, 0, '^', 0,   0, 0, 0,   0,   0,    0, 0,   0,    0,   0,
+	0,   0, 0, 0, 0,   0,   0, 0, '{', '}', 0,    0, 0,   0,    0,   '\\',
+	0,   0, 0, 0, 0,   0,   0, 0, 0,   0,   0,    0, '[', '~',  ']', 0,
+	'|', 0, 0, 0, 0,   0,   0, 0, 0,   0,   0,    0, 0,   0,    0,   0,
+	0,   0, 0, 0, 0,   0,   0, 0, 0,   0,   0,    0, 0,   0,    0,   0,
+	0,   0, 0, 0, 0,0x20AC, 0, 0, 0,   0,   0,    0, 0,   0,    0,   0,
+	0,   0, 0, 0, 0,   0,   0, 0, 0,   0,   0,    0, 0,   0,    0,   0
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/uptools/libcoding/ucs2_decode.c	Sat Feb 03 20:07:05 2018 +0000
@@ -0,0 +1,49 @@
+/*
+ * This library module implements the conversion of UCS2-encoded data
+ * (typically received in SMS) into ASCII, ISO 8859-1 or UTF-8,
+ * maintaining parallelism with the corresponding function for decoding
+ * GSM7-encoded data.
+ */
+
+#include <sys/types.h>
+
+ucs2_to_ascii_or_ext(inbuf, inlen, outbuf, outlenp, ascii_ext, newline_ok, errp)
+	u_char *inbuf, *outbuf;
+	unsigned inlen, *outlenp, *errp;
+{
+	u_char *inp, *endp, *outp;
+	unsigned errcnt = 0;
+	unsigned uni;
+
+	inp = inbuf;
+	endp = inbuf + (inlen & ~1);
+	outp = outbuf;
+	while (inp < endp) {
+		uni = (inp[0] << 8) | inp[1];
+		inp += 2;
+		if (uni == '\r') {
+			*outp++ = '\\';
+			*outp++ = 'r';
+			errcnt++;
+		} else if (uni == '\n') {
+			if (newline_ok)
+				*outp++ = '\n';
+			else {
+				*outp++ = '\\';
+				*outp++ = 'n';
+				errcnt++;
+			}
+		} else if (!is_decoded_char_ok(uni, ascii_ext)) {
+			*outp++ = '?';
+			errcnt++;
+		} else if (ascii_ext == 2)
+			outp += emit_utf8_char(uni, outp);
+		else
+			*outp++ = uni;
+	}
+	*outp = '\0';
+	if (outlenp)
+		*outlenp = outp - outbuf;
+	if (errp)
+		*errp = errcnt;
+}