Rename utf2ucs() to utf8_to_unicode(), and export it so it can be used

elsewhere.

Similarly rename the version in mbprint.c, not because this affects anything
but just to keep the two copies in exact sync.  There was some discussion of
having only one copy in src/port/ instead, but this function is so small
and unlikely to change that that seems like overkill.

Slightly editorialized version of a patch by Joseph Adams.  (The bug-fix
aspect of his patch was applied separately, and back-patched.)
This commit is contained in:
Tom Lane 2010-08-18 19:54:01 +00:00
parent b5565bca11
commit 2d8314bd43
3 changed files with 22 additions and 17 deletions

View file

@ -1,7 +1,7 @@
/*
* conversion functions between pg_wchar and multibyte streams.
* Tatsuo Ishii
* $PostgreSQL: pgsql/src/backend/utils/mb/wchar.c,v 1.74 2010/01/04 20:38:31 adunstan Exp $
* $PostgreSQL: pgsql/src/backend/utils/mb/wchar.c,v 1.75 2010/08/18 19:54:01 tgl Exp $
*
*/
/* can be used in either frontend or backend */
@ -462,7 +462,7 @@ unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
* We return "1" for any leading byte that is either flat-out illegal or
* indicates a length larger than we support.
*
* pg_utf2wchar_with_len(), utf2ucs(), pg_utf8_islegal(), and perhaps
* pg_utf2wchar_with_len(), utf8_to_unicode(), pg_utf8_islegal(), and perhaps
* other places would need to be fixed to change this.
*/
int
@ -632,13 +632,15 @@ ucs_wcwidth(pg_wchar ucs)
(ucs >= 0x20000 && ucs <= 0x2ffff)));
}
static pg_wchar
utf2ucs(const unsigned char *c)
/*
* Convert a UTF-8 character to a Unicode code point.
* This is a one-character version of pg_utf2wchar_with_len.
*
* No error checks here, c must point to a long-enough string.
*/
pg_wchar
utf8_to_unicode(const unsigned char *c)
{
/*
* one char version of pg_utf2wchar_with_len. no control here, c must
* point to a large enough string
*/
if ((*c & 0x80) == 0)
return (pg_wchar) c[0];
else if ((*c & 0xe0) == 0xc0)
@ -661,7 +663,7 @@ utf2ucs(const unsigned char *c)
static int
pg_utf_dsplen(const unsigned char *s)
{
return ucs_wcwidth(utf2ucs(s));
return ucs_wcwidth(utf8_to_unicode(s));
}
/*

View file

@ -3,7 +3,7 @@
*
* Copyright (c) 2000-2010, PostgreSQL Global Development Group
*
* $PostgreSQL: pgsql/src/bin/psql/mbprint.c,v 1.39 2010/08/16 00:06:18 tgl Exp $
* $PostgreSQL: pgsql/src/bin/psql/mbprint.c,v 1.40 2010/08/18 19:54:01 tgl Exp $
*
* XXX this file does not really belong in psql/. Perhaps move to libpq?
* It also seems that the mbvalidate function is redundant with existing
@ -43,13 +43,15 @@ pg_get_utf8_id(void)
#define PG_UTF8 pg_get_utf8_id()
/*
* Convert a UTF-8 character to a Unicode code point.
* This is a one-character version of pg_utf2wchar_with_len.
*
* No error checks here, c must point to a long-enough string.
*/
static pg_wchar
utf2ucs(const unsigned char *c)
utf8_to_unicode(const unsigned char *c)
{
/*
* one char version of pg_utf2wchar_with_len. no control here, c must
* point to a large enough string
*/
if ((*c & 0x80) == 0)
return (pg_wchar) c[0];
else if ((*c & 0xe0) == 0xc0)
@ -346,7 +348,7 @@ pg_wcsformat(unsigned char *pwcs, size_t len, int encoding,
else if (w < 0) /* Non-ascii control char */
{
if (encoding == PG_UTF8)
sprintf((char *) ptr, "\\u%04X", utf2ucs(pwcs));
sprintf((char *) ptr, "\\u%04X", utf8_to_unicode(pwcs));
else
{
/*

View file

@ -6,7 +6,7 @@
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.94 2010/02/26 02:01:25 momjian Exp $
* $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.95 2010/08/18 19:54:01 tgl Exp $
*
* NOTES
* This is used both by the backend and by libpq, but should not be
@ -412,6 +412,7 @@ extern int pg_valid_client_encoding(const char *name);
extern int pg_valid_server_encoding(const char *name);
extern unsigned char *unicode_to_utf8(pg_wchar c, unsigned char *utf8string);
extern pg_wchar utf8_to_unicode(const unsigned char *c);
extern int pg_utf_mblen(const unsigned char *);
extern unsigned char *pg_do_encoding_conversion(unsigned char *src, int len,
int src_encoding,