comparison uptools/libcoding/ucs2_decode.c @ 802:1c599681fd60

pcm-sms-decode & sms-pdu-decode: revamp bad char decoding
author Mychaela Falconia <falcon@freecalypso.org>
date Thu, 25 Mar 2021 02:58:30 +0000
parents 978571e23318
children 30fbaa652ea5
comparison
equal deleted inserted replaced
801:da724c67159d 802:1c599681fd60
4 * maintaining parallelism with the corresponding function for decoding 4 * maintaining parallelism with the corresponding function for decoding
5 * GSM7-encoded data. 5 * GSM7-encoded data.
6 */ 6 */
7 7
8 #include <sys/types.h> 8 #include <sys/types.h>
9 #include <stdio.h>
9 10
10 ucs2_to_ascii_or_ext(inbuf, inlen, outbuf, outlenp, ascii_ext, newline_ok, errp) 11 ucs2_to_ascii_or_ext(inbuf, inlen, outbuf, outlenp, ascii_ext, newline_ok, errp)
11 u_char *inbuf, *outbuf; 12 u_char *inbuf, *outbuf;
12 unsigned inlen, *outlenp, *errp; 13 unsigned inlen, *outlenp, *errp;
13 { 14 {
14 u_char *inp, *endp, *outp; 15 u_char *inp, *endp, *outp;
15 unsigned errcnt = 0;
16 unsigned uni; 16 unsigned uni;
17 17
18 inp = inbuf; 18 inp = inbuf;
19 endp = inbuf + (inlen & ~1); 19 endp = inbuf + (inlen & ~1);
20 outp = outbuf; 20 outp = outbuf;
21 while (inp < endp) { 21 while (inp < endp) {
22 if ((endp - inp) >= 4 && (inp[0] & 0xFC) == 0xD8 &&
23 (inp[2] & 0xFC) == 0xDC) {
24 uni = ((inp[0] & 3) << 18) | (inp[1] << 10) |
25 ((inp[2] & 3) << 8) | inp[3];
26 inp += 4;
27 uni += 0x10000;
28 if (ascii_ext == 2)
29 outp += emit_utf8_char(uni, outp);
30 else {
31 sprintf(outp, "\\U%06X", uni);
32 outp += 8;
33 }
34 continue;
35 }
22 uni = (inp[0] << 8) | inp[1]; 36 uni = (inp[0] << 8) | inp[1];
23 inp += 2; 37 inp += 2;
24 if (uni == '\r') { 38 if (uni == '\\') {
39 *outp++ = '\\';
40 *outp++ = '\\';
41 } else if (uni == '\r') {
25 *outp++ = '\\'; 42 *outp++ = '\\';
26 *outp++ = 'r'; 43 *outp++ = 'r';
27 errcnt++;
28 } else if (uni == '\n') { 44 } else if (uni == '\n') {
29 if (newline_ok) 45 if (newline_ok)
30 *outp++ = '\n'; 46 *outp++ = '\n';
31 else { 47 else {
32 *outp++ = '\\'; 48 *outp++ = '\\';
33 *outp++ = 'n'; 49 *outp++ = 'n';
34 errcnt++;
35 } 50 }
36 } else if (!is_decoded_char_ok(uni, ascii_ext)) { 51 } else if (!is_decoded_char_ok(uni, ascii_ext)) {
37 *outp++ = '?'; 52 sprintf(outp, "\\u%04X", uni);
38 errcnt++; 53 outp += 6;
39 } else if (ascii_ext == 2) 54 } else if (ascii_ext == 2)
40 outp += emit_utf8_char(uni, outp); 55 outp += emit_utf8_char(uni, outp);
41 else 56 else
42 *outp++ = uni; 57 *outp++ = uni;
43 } 58 }
44 *outp = '\0'; 59 *outp = '\0';
45 if (outlenp) 60 if (outlenp)
46 *outlenp = outp - outbuf; 61 *outlenp = outp - outbuf;
47 if (errp) 62 if (errp)
48 *errp = errcnt; 63 *errp = 0;
49 } 64 }