changeset 802:1c599681fd60

pcm-sms-decode & sms-pdu-decode: revamp bad char decoding
author Mychaela Falconia <falcon@freecalypso.org>
date Thu, 25 Mar 2021 02:58:30 +0000
parents da724c67159d
children 5637794913a8
files uptools/libcoding/decode_helpers.c uptools/libcoding/gsm7_decode.c uptools/libcoding/ucs2_decode.c uptools/sms-pdu-decode/pdu-common.c
diffstat 4 files changed, 73 insertions(+), 30 deletions(-) [+]
line wrap: on
line diff
--- a/uptools/libcoding/decode_helpers.c	Thu Mar 25 01:40:36 2021 +0000
+++ b/uptools/libcoding/decode_helpers.c	Thu Mar 25 02:58:30 2021 +0000
@@ -48,8 +48,15 @@
 		outp[1] = 0x80 | (uni & 0x3F);
 		return(2);
 	}
-	outp[0] = 0xE0 | (uni >> 12);
-	outp[1] = 0x80 | ((uni >> 6) & 0x3F);
-	outp[2] = 0x80 | (uni & 0x3F);
-	return(3);
+	if (uni < 0x10000) {
+		outp[0] = 0xE0 | (uni >> 12);
+		outp[1] = 0x80 | ((uni >> 6) & 0x3F);
+		outp[2] = 0x80 | (uni & 0x3F);
+		return(3);
+	}
+	outp[0] = 0xF0 | (uni >> 18);
+	outp[1] = 0x80 | ((uni >> 12) & 0x3F);
+	outp[2] = 0x80 | ((uni >> 6) & 0x3F);
+	outp[3] = 0x80 | (uni & 0x3F);
+	return(4);
 }
--- a/uptools/libcoding/gsm7_decode.c	Thu Mar 25 01:40:36 2021 +0000
+++ b/uptools/libcoding/gsm7_decode.c	Thu Mar 25 02:58:30 2021 +0000
@@ -4,6 +4,7 @@
  */
 
 #include <sys/types.h>
+#include <stdio.h>
 
 extern u_short gsm7_decode_table[128];
 extern u_short gsm7ext_decode_table[128];
@@ -13,33 +14,53 @@
 	unsigned inlen, *outlenp, *errp;
 {
 	u_char *inp, *endp, *outp;
-	unsigned errcnt = 0;
 	unsigned gsm, uni;
+	int is_ext;
 
 	inp = inbuf;
 	endp = inbuf + inlen;
 	outp = outbuf;
 	while (inp < endp) {
 		gsm = *inp++;
-		if (gsm == 0x1B && inp < endp)
-			uni = gsm7ext_decode_table[*inp++];
-		else
+		if (gsm == 0x1B && inp < endp && *inp != 0x1B && *inp != '\n'
+		    && *inp != '\r') {
+			gsm = *inp++;
+			uni = gsm7ext_decode_table[gsm];
+			if (uni == '\\') {
+				*outp++ = '\\';
+				*outp++ = '\\';
+				continue;
+			}
+			is_ext = 1;
+		} else {
+			switch (gsm) {
+			case 0x1B:
+				*outp++ = '\\';
+				*outp++ = 'e';
+				continue;
+			case '\n':
+				if (newline_ok)
+					*outp++ = '\n';
+				else {
+					*outp++ = '\\';
+					*outp++ = 'n';
+				}
+				continue;
+			case '\r':
+				*outp++ = '\\';
+				*outp++ = 'r';
+				continue;
+			}
 			uni = gsm7_decode_table[gsm];
-		if (uni == '\r') {
-			*outp++ = '\\';
-			*outp++ = 'r';
-			errcnt++;
-		} else if (uni == '\n') {
-			if (newline_ok)
-				*outp++ = '\n';
-			else {
+			is_ext = 0;
+		}
+		if (!uni || !is_decoded_char_ok(uni, ascii_ext)) {
+			if (is_ext) {
 				*outp++ = '\\';
-				*outp++ = 'n';
-				errcnt++;
+				*outp++ = 'e';
 			}
-		} else if (!uni || !is_decoded_char_ok(uni, ascii_ext)) {
-			*outp++ = '?';
-			errcnt++;
+			sprintf(outp, "\\%02X", gsm);
+			outp += 3;
 		} else if (ascii_ext == 2)
 			outp += emit_utf8_char(uni, outp);
 		else
@@ -49,5 +70,5 @@
 	if (outlenp)
 		*outlenp = outp - outbuf;
 	if (errp)
-		*errp = errcnt;
+		*errp = 0;
 }
--- a/uptools/libcoding/ucs2_decode.c	Thu Mar 25 01:40:36 2021 +0000
+++ b/uptools/libcoding/ucs2_decode.c	Thu Mar 25 02:58:30 2021 +0000
@@ -6,36 +6,51 @@
  */
 
 #include <sys/types.h>
+#include <stdio.h>
 
 ucs2_to_ascii_or_ext(inbuf, inlen, outbuf, outlenp, ascii_ext, newline_ok, errp)
 	u_char *inbuf, *outbuf;
 	unsigned inlen, *outlenp, *errp;
 {
 	u_char *inp, *endp, *outp;
-	unsigned errcnt = 0;
 	unsigned uni;
 
 	inp = inbuf;
 	endp = inbuf + (inlen & ~1);
 	outp = outbuf;
 	while (inp < endp) {
+		if ((endp - inp) >= 4 && (inp[0] & 0xFC) == 0xD8 &&
+		    (inp[2] & 0xFC) == 0xDC) {
+			uni = ((inp[0] & 3) << 18) | (inp[1] << 10) |
+			      ((inp[2] & 3) << 8) | inp[3];
+			inp += 4;
+			uni += 0x10000;
+			if (ascii_ext == 2)
+				outp += emit_utf8_char(uni, outp);
+			else {
+				sprintf(outp, "\\U%06X", uni);
+				outp += 8;
+			}
+			continue;
+		}
 		uni = (inp[0] << 8) | inp[1];
 		inp += 2;
-		if (uni == '\r') {
+		if (uni == '\\') {
+			*outp++ = '\\';
+			*outp++ = '\\';
+		} else if (uni == '\r') {
 			*outp++ = '\\';
 			*outp++ = 'r';
-			errcnt++;
 		} else if (uni == '\n') {
 			if (newline_ok)
 				*outp++ = '\n';
 			else {
 				*outp++ = '\\';
 				*outp++ = 'n';
-				errcnt++;
 			}
 		} else if (!is_decoded_char_ok(uni, ascii_ext)) {
-			*outp++ = '?';
-			errcnt++;
+			sprintf(outp, "\\u%04X", uni);
+			outp += 6;
 		} else if (ascii_ext == 2)
 			outp += emit_utf8_char(uni, outp);
 		else
@@ -45,5 +60,5 @@
 	if (outlenp)
 		*outlenp = outp - outbuf;
 	if (errp)
-		*errp = errcnt;
+		*errp = 0;
 }
--- a/uptools/sms-pdu-decode/pdu-common.c	Thu Mar 25 01:40:36 2021 +0000
+++ b/uptools/sms-pdu-decode/pdu-common.c	Thu Mar 25 02:58:30 2021 +0000
@@ -250,7 +250,7 @@
 {
 	unsigned udl, udl_octets;
 	unsigned udhl, udh_octets, udh_chars, ud_chars;
-	u_char ud7[160], decode_buf[321];
+	u_char ud7[160], decode_buf[481];
 	int do_hexdump;
 	unsigned decoded_len, badchars;