diff uptools/libcoding/ucs2_decode.c @ 802:1c599681fd60

pcm-sms-decode & sms-pdu-decode: revamp bad char decoding
author Mychaela Falconia <falcon@freecalypso.org>
date Thu, 25 Mar 2021 02:58:30 +0000
parents 978571e23318
children 30fbaa652ea5
line wrap: on
line diff
--- a/uptools/libcoding/ucs2_decode.c	Thu Mar 25 01:40:36 2021 +0000
+++ b/uptools/libcoding/ucs2_decode.c	Thu Mar 25 02:58:30 2021 +0000
@@ -6,36 +6,51 @@
  */
 
 #include <sys/types.h>
+#include <stdio.h>
 
 ucs2_to_ascii_or_ext(inbuf, inlen, outbuf, outlenp, ascii_ext, newline_ok, errp)
 	u_char *inbuf, *outbuf;
 	unsigned inlen, *outlenp, *errp;
 {
 	u_char *inp, *endp, *outp;
-	unsigned errcnt = 0;
 	unsigned uni;
 
 	inp = inbuf;
 	endp = inbuf + (inlen & ~1);
 	outp = outbuf;
 	while (inp < endp) {
+		if ((endp - inp) >= 4 && (inp[0] & 0xFC) == 0xD8 &&
+		    (inp[2] & 0xFC) == 0xDC) {
+			uni = ((inp[0] & 3) << 18) | (inp[1] << 10) |
+			      ((inp[2] & 3) << 8) | inp[3];
+			inp += 4;
+			uni += 0x10000;
+			if (ascii_ext == 2)
+				outp += emit_utf8_char(uni, outp);
+			else {
+				sprintf(outp, "\\U%06X", uni);
+				outp += 8;
+			}
+			continue;
+		}
 		uni = (inp[0] << 8) | inp[1];
 		inp += 2;
-		if (uni == '\r') {
+		if (uni == '\\') {
+			*outp++ = '\\';
+			*outp++ = '\\';
+		} else if (uni == '\r') {
 			*outp++ = '\\';
 			*outp++ = 'r';
-			errcnt++;
 		} else if (uni == '\n') {
 			if (newline_ok)
 				*outp++ = '\n';
 			else {
 				*outp++ = '\\';
 				*outp++ = 'n';
-				errcnt++;
 			}
 		} else if (!is_decoded_char_ok(uni, ascii_ext)) {
-			*outp++ = '?';
-			errcnt++;
+			sprintf(outp, "\\u%04X", uni);
+			outp += 6;
 		} else if (ascii_ext == 2)
 			outp += emit_utf8_char(uni, outp);
 		else
@@ -45,5 +60,5 @@
 	if (outlenp)
 		*outlenp = outp - outbuf;
 	if (errp)
-		*errp = errcnt;
+		*errp = 0;
 }