NLS: improve UTF8 -> UTF16 string conversion routine
The utf8s_to_utf16s conversion routine needs to be improved. Unlike its utf16s_to_utf8s sibling, it doesn't accept arguments specifying the maximum length of the output buffer or the endianness of its 16-bit output. This patch (as1501) adds the two missing arguments, and adjusts the only two places in the kernel where the function is called. A follow-on patch will add a third caller that does utilize the new capabilities. The two conversion routines are still annoyingly inconsistent in the way they handle invalid byte combinations. But that's a subject for a different patch. Signed-off-by: Alan Stern <stern@rowland.harvard.edu> CC: Clemens Ladisch <clemens@ladisch.de> Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
This commit is contained in:
committed by
Greg Kroah-Hartman
parent
b7463c71fb
commit
0720a06a75
@@ -212,11 +212,13 @@ kvp_respond_to_host(char *key, char *value, int error)
|
|||||||
* The windows host expects the key/value pair to be encoded
|
* The windows host expects the key/value pair to be encoded
|
||||||
* in utf16.
|
* in utf16.
|
||||||
*/
|
*/
|
||||||
keylen = utf8s_to_utf16s(key_name, strlen(key_name),
|
keylen = utf8s_to_utf16s(key_name, strlen(key_name), UTF16_HOST_ENDIAN,
|
||||||
(wchar_t *)kvp_data->data.key);
|
(wchar_t *) kvp_data->data.key,
|
||||||
|
HV_KVP_EXCHANGE_MAX_KEY_SIZE / 2);
|
||||||
kvp_data->data.key_size = 2*(keylen + 1); /* utf16 encoding */
|
kvp_data->data.key_size = 2*(keylen + 1); /* utf16 encoding */
|
||||||
valuelen = utf8s_to_utf16s(value, strlen(value),
|
valuelen = utf8s_to_utf16s(value, strlen(value), UTF16_HOST_ENDIAN,
|
||||||
(wchar_t *)kvp_data->data.value);
|
(wchar_t *) kvp_data->data.value,
|
||||||
|
HV_KVP_EXCHANGE_MAX_VALUE_SIZE / 2);
|
||||||
kvp_data->data.value_size = 2*(valuelen + 1); /* utf16 encoding */
|
kvp_data->data.value_size = 2*(valuelen + 1); /* utf16 encoding */
|
||||||
|
|
||||||
kvp_data->data.value_type = REG_SZ; /* all our values are strings */
|
kvp_data->data.value_type = REG_SZ; /* all our values are strings */
|
||||||
|
@@ -512,7 +512,8 @@ xlate_to_uni(const unsigned char *name, int len, unsigned char *outname,
|
|||||||
int charlen;
|
int charlen;
|
||||||
|
|
||||||
if (utf8) {
|
if (utf8) {
|
||||||
*outlen = utf8s_to_utf16s(name, len, (wchar_t *)outname);
|
*outlen = utf8s_to_utf16s(name, len, UTF16_HOST_ENDIAN,
|
||||||
|
(wchar_t *) outname, FAT_LFN_LEN + 2);
|
||||||
if (*outlen < 0)
|
if (*outlen < 0)
|
||||||
return *outlen;
|
return *outlen;
|
||||||
else if (*outlen > FAT_LFN_LEN)
|
else if (*outlen > FAT_LFN_LEN)
|
||||||
|
@@ -114,34 +114,57 @@ int utf32_to_utf8(unicode_t u, u8 *s, int maxlen)
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL(utf32_to_utf8);
|
EXPORT_SYMBOL(utf32_to_utf8);
|
||||||
|
|
||||||
int utf8s_to_utf16s(const u8 *s, int len, wchar_t *pwcs)
|
static inline void put_utf16(wchar_t *s, unsigned c, enum utf16_endian endian)
|
||||||
|
{
|
||||||
|
switch (endian) {
|
||||||
|
default:
|
||||||
|
*s = (wchar_t) c;
|
||||||
|
break;
|
||||||
|
case UTF16_LITTLE_ENDIAN:
|
||||||
|
*s = __cpu_to_le16(c);
|
||||||
|
break;
|
||||||
|
case UTF16_BIG_ENDIAN:
|
||||||
|
*s = __cpu_to_be16(c);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int utf8s_to_utf16s(const u8 *s, int len, enum utf16_endian endian,
|
||||||
|
wchar_t *pwcs, int maxlen)
|
||||||
{
|
{
|
||||||
u16 *op;
|
u16 *op;
|
||||||
int size;
|
int size;
|
||||||
unicode_t u;
|
unicode_t u;
|
||||||
|
|
||||||
op = pwcs;
|
op = pwcs;
|
||||||
while (*s && len > 0) {
|
while (len > 0 && maxlen > 0 && *s) {
|
||||||
if (*s & 0x80) {
|
if (*s & 0x80) {
|
||||||
size = utf8_to_utf32(s, len, &u);
|
size = utf8_to_utf32(s, len, &u);
|
||||||
if (size < 0)
|
if (size < 0)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
if (u >= PLANE_SIZE) {
|
|
||||||
u -= PLANE_SIZE;
|
|
||||||
*op++ = (wchar_t) (SURROGATE_PAIR |
|
|
||||||
((u >> 10) & SURROGATE_BITS));
|
|
||||||
*op++ = (wchar_t) (SURROGATE_PAIR |
|
|
||||||
SURROGATE_LOW |
|
|
||||||
(u & SURROGATE_BITS));
|
|
||||||
} else {
|
|
||||||
*op++ = (wchar_t) u;
|
|
||||||
}
|
|
||||||
s += size;
|
s += size;
|
||||||
len -= size;
|
len -= size;
|
||||||
|
|
||||||
|
if (u >= PLANE_SIZE) {
|
||||||
|
if (maxlen < 2)
|
||||||
|
break;
|
||||||
|
u -= PLANE_SIZE;
|
||||||
|
put_utf16(op++, SURROGATE_PAIR |
|
||||||
|
((u >> 10) & SURROGATE_BITS),
|
||||||
|
endian);
|
||||||
|
put_utf16(op++, SURROGATE_PAIR |
|
||||||
|
SURROGATE_LOW |
|
||||||
|
(u & SURROGATE_BITS),
|
||||||
|
endian);
|
||||||
|
maxlen -= 2;
|
||||||
} else {
|
} else {
|
||||||
*op++ = *s++;
|
put_utf16(op++, u, endian);
|
||||||
|
maxlen--;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
put_utf16(op++, *s++, endian);
|
||||||
len--;
|
len--;
|
||||||
|
maxlen--;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return op - pwcs;
|
return op - pwcs;
|
||||||
|
@@ -43,7 +43,7 @@ enum utf16_endian {
|
|||||||
UTF16_BIG_ENDIAN
|
UTF16_BIG_ENDIAN
|
||||||
};
|
};
|
||||||
|
|
||||||
/* nls.c */
|
/* nls_base.c */
|
||||||
extern int register_nls(struct nls_table *);
|
extern int register_nls(struct nls_table *);
|
||||||
extern int unregister_nls(struct nls_table *);
|
extern int unregister_nls(struct nls_table *);
|
||||||
extern struct nls_table *load_nls(char *);
|
extern struct nls_table *load_nls(char *);
|
||||||
@@ -52,7 +52,8 @@ extern struct nls_table *load_nls_default(void);
|
|||||||
|
|
||||||
extern int utf8_to_utf32(const u8 *s, int len, unicode_t *pu);
|
extern int utf8_to_utf32(const u8 *s, int len, unicode_t *pu);
|
||||||
extern int utf32_to_utf8(unicode_t u, u8 *s, int maxlen);
|
extern int utf32_to_utf8(unicode_t u, u8 *s, int maxlen);
|
||||||
extern int utf8s_to_utf16s(const u8 *s, int len, wchar_t *pwcs);
|
extern int utf8s_to_utf16s(const u8 *s, int len,
|
||||||
|
enum utf16_endian endian, wchar_t *pwcs, int maxlen);
|
||||||
extern int utf16s_to_utf8s(const wchar_t *pwcs, int len,
|
extern int utf16s_to_utf8s(const wchar_t *pwcs, int len,
|
||||||
enum utf16_endian endian, u8 *s, int maxlen);
|
enum utf16_endian endian, u8 *s, int maxlen);
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user