changeset 0:2d0082216916

libcoding: beginning with a subset of uptools version
author Mychaela Falconia <falcon@freecalypso.org>
date Sat, 05 Aug 2023 00:46:23 +0000
parents
children 13518c86b73c
files .hgignore libcoding/Makefile libcoding/gsm7_encode.c libcoding/gsm7_encode_table.c libcoding/gsm7_pack.c libcoding/ucs2_bigend.c libcoding/utf8_decode.c libcoding/utf8_decode2.c
diffstat 8 files changed, 212 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/.hgignore	Sat Aug 05 00:46:23 2023 +0000
@@ -0,0 +1,3 @@
+syntax: regexp
+
+\.[oa]$
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libcoding/Makefile	Sat Aug 05 00:46:23 2023 +0000
@@ -0,0 +1,14 @@
+CC=	gcc
+CFLAGS=	-O2
+OBJS=	gsm7_encode.o gsm7_encode_table.o gsm7_pack.o ucs2_bigend.o \
+	utf8_decode.o utf8_decode2.o
+LIB=	libcoding.a
+
+all:	${LIB}
+
+${LIB}:	${OBJS}
+	ar rcu $@ ${OBJS}
+	ranlib $@
+
+clean:
+	rm -f *.[oa] errs
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libcoding/gsm7_encode.c	Sat Aug 05 00:46:23 2023 +0000
@@ -0,0 +1,36 @@
+/*
+ * This library module implements the function for encoding from ISO 8859-1
+ * into the GSM 7-bit default alphabet (03.38 or 23.038).
+ */
+
+#include <sys/types.h>
+
+extern u_char gsm7_encode_table[256];
+
+latin1_to_gsm7(inbuf, outbuf, outmax, outlenp)
+	u_char *inbuf, *outbuf;
+	unsigned outmax, *outlenp;
+{
+	u_char *ip = inbuf, *op = outbuf;
+	unsigned outcnt = 0, c, n;
+
+	while (c = *ip++) {
+		c = gsm7_encode_table[c];
+		if (c == 0xFF)
+			return(-1);
+		if (c & 0x80)
+			n = 2;
+		else
+			n = 1;
+		if (outcnt + n > outmax)
+			return(-2);
+		if (c & 0x80) {
+			*op++ = 0x1B;
+			*op++ = c & 0x7F;
+		} else
+			*op++ = c;
+		outcnt += n;
+	}
+	*outlenp = outcnt;
+	return(0);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libcoding/gsm7_encode_table.c	Sat Aug 05 00:46:23 2023 +0000
@@ -0,0 +1,43 @@
+/*
+ * This library module contains the table for encoding from ISO 8859-1
+ * into the GSM 7-bit default alphabet (03.38 or 23.038).  High bit set
+ * in the output indicates escape encoding, used for ASCII characters
+ * [\]^ and {|}~.  0xFF indicates invalid chars.
+ */
+
+#include <sys/types.h>
+
+u_char gsm7_encode_table[256] = {
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,	/* 0x00 */
+	0xFF, 0xFF, '\n', 0xFF, 0xFF, '\r', 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,	/* 0x10 */
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	' ',  '!',  '"',  '#',  0x02, '%',  '&',  0x27,	/* 0x20 */
+	'(',  ')',  '*',  '+',  ',',  '-',  '.',  '/',
+	'0',  '1',  '2',  '3',  '4',  '5',  '6',  '7',	/* 0x30 */
+	'8',  '9',  ':',  ';',  '<',  '=',  '>',  '?',
+	0x00, 'A',  'B',  'C',  'D',  'E',  'F',  'G',	/* 0x40 */
+	'H',  'I',  'J',  'K',  'L',  'M',  'N',  'O',
+	'P',  'Q',  'R',  'S',  'T',  'U',  'V',  'W',	/* 0x50 */
+	'X',  'Y',  'Z',  0xBC, 0xAF, 0xBE, 0x94, 0x11,
+	0xFF, 'a',  'b',  'c',  'd',  'e',  'f',  'g',	/* 0x60 */
+	'h',  'i',  'j',  'k',  'l',  'm',  'n',  'o',
+	'p',  'q',  'r',  's',  't',  'u',  'v',  'w',	/* 0x70 */
+	'x',  'y',  'z',  0xA8, 0xC0, 0xA9, 0xBD, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,	/* 0x80 */
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,	/* 0x90 */
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0x40, 0xFF, 0x01, 0x24, 0x03, 0xFF, 0x5F,	/* 0xA0 */
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,	/* 0xB0 */
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x60,
+	0xFF, 0xFF, 0xFF, 0xFF, 0x5B, 0x0E, 0x1C, 0x09,	/* 0xC0 */
+	0xFF, 0x1F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0x5D, 0xFF, 0xFF, 0xFF, 0xFF, 0x5C, 0xFF,	/* 0xD0 */
+	0x0B, 0xFF, 0xFF, 0xFF, 0x5E, 0xFF, 0xFF, 0x1E,
+	0x7F, 0xFF, 0xFF, 0xFF, 0x7B, 0x0F, 0x1D, 0xFF,	/* 0xE0 */
+	0x04, 0x05, 0xFF, 0xFF, 0x07, 0xFF, 0xFF, 0xFF,
+	0xFF, 0x7D, 0x08, 0xFF, 0xFF, 0xFF, 0x7C, 0xFF,	/* 0xF0 */
+	0x0C, 0x06, 0xFF, 0xFF, 0x7E, 0xFF, 0xFF, 0xFF
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libcoding/gsm7_pack.c	Sat Aug 05 00:46:23 2023 +0000
@@ -0,0 +1,22 @@
+/*
+ * This library module implements the function for packing septets into octets.
+ */
+
+#include <sys/types.h>
+
+gsm7_pack(inbuf, outbuf, noctets)
+	u_char *inbuf, *outbuf;
+	unsigned noctets;
+{
+	u_char *ip = inbuf, *op = outbuf;
+	unsigned n, c;
+
+	for (n = 0; n < noctets; n++) {
+		c = n % 7;
+		*op++ = ((ip[1] << 7) | ip[0]) >> c;
+		if (c == 6)
+			ip += 2;
+		else
+			ip += 1;
+	}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libcoding/ucs2_bigend.c	Sat Aug 05 00:46:23 2023 +0000
@@ -0,0 +1,22 @@
+/*
+ * This library module implements the function for converting a UCS-2
+ * string from native 16-bit words into a byte buffer for SMS.
+ */
+
+#include <sys/types.h>
+
+ucs2_out_bigend(inbuf, outbuf, nuni)
+	u_short *inbuf;
+	u_char *outbuf;
+	unsigned nuni;
+{
+	u_short *ip = inbuf;
+	u_char *op = outbuf;
+	unsigned n, uni;
+
+	for (n = 0; n < nuni; n++) {
+		uni = *ip++;
+		*op++ = uni >> 8;
+		*op++ = uni;
+	}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libcoding/utf8_decode.c	Sat Aug 05 00:46:23 2023 +0000
@@ -0,0 +1,29 @@
+/*
+ * This library module implements a function that converts text input
+ * from UTF-8 to ISO 8859-1, rejecting any input Unicode characters
+ * that aren't in the 8859-1 range.  The conversion in done in place.
+ */
+
+#include <sys/types.h>
+
+utf8_to_latin1(buf)
+	u_char *buf;
+{
+	u_char *ip = buf, *op = buf;
+	int c, c2;
+
+	while (c = *ip++) {
+		if (c < 0x80) {
+			*op++ = c;
+			continue;
+		}
+		if (c != 0xC2 && c != 0xC3)
+			return(-1);
+		c2 = *ip++;
+		if (c2 < 0x80 || c2 > 0xBF)
+			return(-1);
+		*op++ = ((c & 3) << 6) | (c2 & 0x3F);
+	}
+	*op = '\0';
+	return(0);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libcoding/utf8_decode2.c	Sat Aug 05 00:46:23 2023 +0000
@@ -0,0 +1,43 @@
+/*
+ * This library module implements the function for converting UTF-8 input
+ * to UCS-2 in outgoing SMS composition.
+ */
+
+#include <sys/types.h>
+
+utf8_to_ucs2(inbuf, outbuf, outmax, outlenp)
+	u_char *inbuf;
+	u_short *outbuf;
+	unsigned outmax, *outlenp;
+{
+	u_char *ip = inbuf;
+	u_short *op = outbuf;
+	unsigned outcnt = 0, c, n, uni;
+
+	while (c = *ip++) {
+		if (c < 0x80) {
+			uni = c;
+			goto gotuni;
+		}
+		if (c < 0xC0 || c > 0xEF)
+			return(-1);
+		uni = c & 0x1F;
+		if (c >= 0xE0)
+			n = 2;
+		else
+			n = 1;
+		for (; n; n--) {
+			c = *ip++;
+			if (c < 0x80 || c > 0xBF)
+				return(-1);
+			uni <<= 6;
+			uni |= c & 0x3F;
+		}
+gotuni:		if (outcnt >= outmax)
+			return(-2);
+		*op++ = uni;
+		outcnt++;
+	}
+	*outlenp = outcnt;
+	return(0);
+}