From b533686218da2e0b6cb59151c130a39dc08a1f64 Mon Sep 17 00:00:00 2001 From: "R. Steve McKown" Date: Tue, 18 May 2010 12:27:13 -0600 Subject: [PATCH] More generic unicode support, no redefinitions of USB headers, no USBSTRLEN. --- src/cp210x.c | 134 +++++++++++++++++++++++++++++++------------- src/cp210x.c.karmic | 134 +++++++++++++++++++++++++++++++------------- 2 files changed, 189 insertions(+), 79 deletions(-) diff --git a/src/cp210x.c b/src/cp210x.c index 8c39dd1..6576939 100644 --- a/src/cp210x.c +++ b/src/cp210x.c @@ -203,39 +203,94 @@ static struct usb_serial_driver cp210x_device = { #define CP210x_PART_CP2102 0x02 #define CP210x_PART_CP2103 0x03 -/* Return the size of the buffer needed to hold a string of len x formatted - * for send to CP210X, and its reverse. - */ -#define USBSTRLEN(strlen) (strlen * 2 + 2) +/* Taken from drivers/usb/gadget/usbstring.c */ +static int utf8_to_utf16le(const char *s, __le16 *cp, unsigned len) +{ + int count = 0; + u8 c; + u16 uchar; + + /* this insists on correct encodings, though not minimal ones. + * BUT it currently rejects legit 4-byte UTF-8 code points, + * which need surrogate pairs. (Unicode 3.1 can use them.) + */ + while (len != 0 && (c = (u8) *s++) != 0) { + if (unlikely(c & 0x80)) { + // 2-byte sequence: + // 00000yyyyyxxxxxx = 110yyyyy 10xxxxxx + if ((c & 0xe0) == 0xc0) { + uchar = (c & 0x1f) << 6; + + c = (u8) *s++; + if ((c & 0xc0) != 0x80) + goto fail; + c &= 0x3f; + uchar |= c; + + // 3-byte sequence (most CJKV characters): + // zzzzyyyyyyxxxxxx = 1110zzzz 10yyyyyy 10xxxxxx + } else if ((c & 0xf0) == 0xe0) { + uchar = (c & 0x0f) << 12; + + c = (u8) *s++; + if ((c & 0xc0) != 0x80) + goto fail; + c &= 0x3f; + uchar |= c << 6; + + c = (u8) *s++; + if ((c & 0xc0) != 0x80) + goto fail; + c &= 0x3f; + uchar |= c; + + /* no bogus surrogates */ + if (0xd800 <= uchar && uchar <= 0xdfff) + goto fail; + + // 4-byte sequence (surrogate pairs, currently rare): + // 11101110wwwwzzzzyy + 110111yyyyxxxxxx + // = 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx + // (uuuuu = wwww + 1) + // FIXME accept the surrogate code points (only) + + } else + goto fail; + } else + uchar = c; + *cp++ = uchar; + count++; + len--; + } + return count; +fail: + return -1; +} -/* Populates usbstr with: (len) + (0x03) + unicode(str). Each char in str - * takes two bytes in unicode format. - * Returns the resulting length of the string in usbstr. + +/* + * make_usb_string + * Populates usbstr with: (len) + (USB_DT_STRING) + unicode(str). Each char in + * usbstr consumes two bytes in unicode (__le16) format. + * Returns the resulting length of usbstr in bytes. * This function can accept overlapping usbstr and str as long as the overlap * does not cause data written to usbstr to overwrite data not yet read from * str. */ -static int make_usb_string(char *usbstr, size_t usblen, char *src, +static int make_usb_string(__le16* usbstr, size_t usblen, const char *src, size_t srclen) { - int len = 0; - - if (usbstr && usblen >= 2 && src && *src && srclen) { - char *p; - - if (usblen > 255) - usblen = 255; - - p = usbstr + 1; - *p++ = 0x03; - len = 2; - while (srclen && len < usblen) { - *p++ = *src++; - *p++ = 0; - len += 2; - srclen--; - } - *usbstr = (char)len; + int len; + + /* string descriptors have length, type, then UTF16-LE text */ + if (usblen > 254) + usblen = 254; + len = min(usblen / 2 - 1, srclen); + len = utf8_to_utf16le(src, usbstr + 1, len); + if (len >= 0) { + len = (len + 1) * 2; + ((u8 *)usbstr)[0] = len; + ((u8 *)usbstr)[1] = USB_DT_STRING; } return len; } @@ -246,7 +301,7 @@ static int make_usb_string(char *usbstr, size_t usblen, char *src, * is the size of the buffer at kbuf. * Returns the number of bytes used in kbuf. */ -static size_t cp210x_usbstr_from_user(char *kbuf, +static size_t cp210x_usbstr_from_user(__le16 *kbuf, struct cp210x_buffer __user *ubuf, size_t klen) { struct cp210x_buffer t; @@ -254,15 +309,15 @@ static size_t cp210x_usbstr_from_user(char *kbuf, size_t slen; if (copy_from_user(&t, ubuf, sizeof(t))) - return 0; - slen = (klen - 2) / 2; + return -EINVAL; + slen = klen / 2 - 1; if (!t.buf || !t.len || t.len > slen) - return 0; + return -EINVAL; if (t.len < slen) slen = t.len; - str = kbuf + klen - slen; + str = (char *)kbuf + klen - slen; if (copy_from_user(str, (__u8 __user *)t.buf, slen)) - return 0; + return -EINVAL; return make_usb_string(kbuf, klen, str, slen); } @@ -350,9 +405,9 @@ static inline int cp210x_setu16(struct usb_serial_port *port, int cmd, * Set a USB string descriptor using proprietary cp210x control messages. * Return the number of characters actually written. */ -static int cp210x_setstr(struct usb_serial_port *port, int cmd, char *usbstr) +static int cp210x_setstr(struct usb_serial_port *port, int cmd, __le16 *usbstr) { - unsigned len = usbstr[0]; + int len = ((u8 *)usbstr)[0]; int ret = cp210x_ctlmsg(port, 0xff, USB_TYPE_VENDOR, 0x3700 | (cmd & 0xff), 0, usbstr, len); dbg("%s - cmd 0x%02x len %d ret %d", __FUNCTION__, cmd, len, ret); @@ -682,7 +737,8 @@ static int cp210x_ioctl(struct usb_serial_port *port, struct file *file, if (cp210x_get_partnum(port) == CP210x_PART_CP2103) { u8 gpio = 0; if (!cp210x_gpioget(port, &gpio) && !copy_to_user( - (u8 __user *)arg, &gpio, sizeof(gpio))) + (__u8 __user *)arg, &gpio, + sizeof(gpio))) return 0; } return -EFAULT; @@ -755,11 +811,11 @@ static int cp210x_ioctl(struct usb_serial_port *port, struct file *file, case CP210x_IOCTL_SETPRODUCT: { - char usbstr[USBSTRLEN(CP210x_MAX_PRODUCT_STRLEN)]; + __le16 usbstr[CP210x_MAX_PRODUCT_STRLEN + 1]; size_t len = cp210x_usbstr_from_user(usbstr, (struct cp210x_buffer __user *)arg, sizeof(usbstr)); - if (len && cp210x_setstr(port, 0x03, usbstr) == len) + if (len > 0 && cp210x_setstr(port, 0x03, usbstr) == len) return 0; return -EFAULT; } @@ -767,11 +823,11 @@ static int cp210x_ioctl(struct usb_serial_port *port, struct file *file, case CP210x_IOCTL_SETSERIAL: { - char usbstr[USBSTRLEN(CP210x_MAX_SERIAL_STRLEN)]; + __le16 usbstr[CP210x_MAX_SERIAL_STRLEN + 1]; size_t len = cp210x_usbstr_from_user(usbstr, (struct cp210x_buffer __user *)arg, sizeof(usbstr)); - if (len && cp210x_setstr(port, 0x04, usbstr) == len) + if (len > 0 && cp210x_setstr(port, 0x04, usbstr) == len) return 0; return -EFAULT; } diff --git a/src/cp210x.c.karmic b/src/cp210x.c.karmic index 764b8bb..8532e2f 100644 --- a/src/cp210x.c.karmic +++ b/src/cp210x.c.karmic @@ -224,40 +224,94 @@ static struct usb_serial_driver cp210x_device = { #define CP210x_PART_CP2102 0x02 #define CP210x_PART_CP2103 0x03 -/* Return the size of the buffer needed to hold a string of len x formatted - * for send to CP210X, and its reverse. - */ -#define USBSTRLEN(strlen) (strlen * 2 + 2) +/* Taken from drivers/usb/gadget/usbstring.c */ +static int utf8_to_utf16le(const char *s, __le16 *cp, unsigned len) +{ + int count = 0; + u8 c; + u16 uchar; + + /* this insists on correct encodings, though not minimal ones. + * BUT it currently rejects legit 4-byte UTF-8 code points, + * which need surrogate pairs. (Unicode 3.1 can use them.) + */ + while (len != 0 && (c = (u8) *s++) != 0) { + if (unlikely(c & 0x80)) { + // 2-byte sequence: + // 00000yyyyyxxxxxx = 110yyyyy 10xxxxxx + if ((c & 0xe0) == 0xc0) { + uchar = (c & 0x1f) << 6; + + c = (u8) *s++; + if ((c & 0xc0) != 0x80) + goto fail; + c &= 0x3f; + uchar |= c; + + // 3-byte sequence (most CJKV characters): + // zzzzyyyyyyxxxxxx = 1110zzzz 10yyyyyy 10xxxxxx + } else if ((c & 0xf0) == 0xe0) { + uchar = (c & 0x0f) << 12; + + c = (u8) *s++; + if ((c & 0xc0) != 0x80) + goto fail; + c &= 0x3f; + uchar |= c << 6; + + c = (u8) *s++; + if ((c & 0xc0) != 0x80) + goto fail; + c &= 0x3f; + uchar |= c; + + /* no bogus surrogates */ + if (0xd800 <= uchar && uchar <= 0xdfff) + goto fail; + + // 4-byte sequence (surrogate pairs, currently rare): + // 11101110wwwwzzzzyy + 110111yyyyxxxxxx + // = 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx + // (uuuuu = wwww + 1) + // FIXME accept the surrogate code points (only) + + } else + goto fail; + } else + uchar = c; + *cp++ = uchar; + count++; + len--; + } + return count; +fail: + return -1; +} -/* Populates usbstr with: (len) + (0x03) + unicode(str). Each char in str - * takes two bytes in unicode format. - * Returns the resulting length of the string in usbstr. +/* + * make_usb_string + * Populates usbstr with: (len) + (USB_DT_STRING) + unicode(str). Each char in + * usbstr consumes two bytes in unicode (__le16) format. + * Returns the resulting length of usbstr in bytes. * This function can accept overlapping usbstr and str as long as the overlap * does not cause data written to usbstr to overwrite data not yet read from * str. */ -static int make_usb_string(char *usbstr, size_t usblen, char *src, +static int make_usb_string(__le16* usbstr, size_t usblen, const char *src, size_t srclen) { - int len = 0; - - if (usbstr && usblen >= 2 && src && *src && srclen) { - char *p; - - if (usblen > 255) - usblen = 255; - - p = usbstr + 1; - *p++ = 0x03; - len = 2; - while (srclen && len < usblen) { - *p++ = *src++; - *p++ = 0; - len += 2; - srclen--; - } - *usbstr = (char)len; + int len; + + /* string descriptors have length, type, then UTF16-LE text */ + if (usblen > 254) + usblen = 254; + len = min(usblen / 2 - 1, srclen); + len = utf8_to_utf16le(src, usbstr + 1, len); + if (len >= 0) { + len = (len + 1) * 2; + ((u8 *)usbstr)[0] = len; + ((u8 *)usbstr)[1] = USB_DT_STRING; } return len; } @@ -268,7 +322,7 @@ static int make_usb_string(char *usbstr, size_t usblen, char *src, * is the size of the buffer at kbuf. * Returns the number of bytes used in kbuf. */ -static size_t cp210x_usbstr_from_user(char *kbuf, +static size_t cp210x_usbstr_from_user(__le16 *kbuf, struct cp210x_buffer __user *ubuf, size_t klen) { struct cp210x_buffer t; @@ -276,19 +330,18 @@ static size_t cp210x_usbstr_from_user(char *kbuf, size_t slen; if (copy_from_user(&t, ubuf, sizeof(t))) - return 0; - slen = (klen - 2) / 2; + return -EINVAL; + slen = klen / 2 - 1; if (!t.buf || !t.len || t.len > slen) - return 0; + return -EINVAL; if (t.len < slen) slen = t.len; - str = kbuf + klen - slen; + str = (char *)kbuf + klen - slen; if (copy_from_user(str, (__u8 __user *)t.buf, slen)) - return 0; + return -EINVAL; return make_usb_string(kbuf, klen, str, slen); } - /* * cp210x_ctlmsg * A generic usb control message interface. @@ -373,9 +426,9 @@ static inline int cp210x_setu16(struct usb_serial_port *port, int cmd, * Set a USB string descriptor using proprietary cp210x control messages. * Return the number of characters actually written. */ -static int cp210x_setstr(struct usb_serial_port *port, int cmd, char *usbstr) +static int cp210x_setstr(struct usb_serial_port *port, int cmd, __le16 *usbstr) { - unsigned len = usbstr[0]; + int len = ((u8 *)usbstr)[0]; int ret = cp210x_ctlmsg(port, 0xff, USB_TYPE_VENDOR, 0x3700 | (cmd & 0xff), 0, usbstr, len); dbg("%s - cmd 0x%02x len %d ret %d", __func__, cmd, len, ret); @@ -751,7 +804,8 @@ static int cp210x_ioctl(struct tty_struct *tty, struct file *file, if (cp210x_get_partnum(port) == CP210x_PART_CP2103) { u8 gpio = 0; if (!cp210x_gpioget(port, &gpio) && !copy_to_user( - (u8 __user *)arg, &gpio, sizeof(gpio))) + (__u8 __user *)arg, &gpio, + sizeof(gpio))) return 0; } return -EFAULT; @@ -824,11 +878,11 @@ static int cp210x_ioctl(struct tty_struct *tty, struct file *file, case CP210x_IOCTL_SETPRODUCT: { - char usbstr[USBSTRLEN(CP210x_MAX_PRODUCT_STRLEN)]; + __le16 usbstr[CP210x_MAX_PRODUCT_STRLEN + 1]; size_t len = cp210x_usbstr_from_user(usbstr, (struct cp210x_buffer __user *)arg, sizeof(usbstr)); - if (len && cp210x_setstr(port, 0x03, usbstr) == len) + if (len > 0 && cp210x_setstr(port, 0x03, usbstr) == len) return 0; return -EFAULT; } @@ -836,11 +890,11 @@ static int cp210x_ioctl(struct tty_struct *tty, struct file *file, case CP210x_IOCTL_SETSERIAL: { - char usbstr[USBSTRLEN(CP210x_MAX_SERIAL_STRLEN)]; + __le16 usbstr[CP210x_MAX_SERIAL_STRLEN + 1]; size_t len = cp210x_usbstr_from_user(usbstr, (struct cp210x_buffer __user *)arg, sizeof(usbstr)); - if (len && cp210x_setstr(port, 0x04, usbstr) == len) + if (len > 0 && cp210x_setstr(port, 0x04, usbstr) == len) return 0; return -EFAULT; } -- 2.39.2