[SystemZ][z/OS] ASCII/EBCDIC support with no coexistence
The aim of this patch is to break up the larger patch (https://reviews.llvm.org/D111323) to be more upstream friendly. In particular, this patch adds the char encoding sensitive changes but does not use inline namespaces as before. The use of namespaces to build both versions of the library, and localization of error messages will follow in a subsequent patch. Differential Revision: https://reviews.llvm.org/D114813
This commit is contained in:
parent
dac82b53e2
commit
a1da73961d
|
@ -467,6 +467,9 @@ include(HandleLibcxxFlags)
|
|||
# These flags get added to CMAKE_CXX_FLAGS and CMAKE_C_FLAGS so that
|
||||
# 'config-ix' use them during feature checks. It also adds them to both
|
||||
# 'LIBCXX_COMPILE_FLAGS' and 'LIBCXX_LINK_FLAGS'
|
||||
if(ZOS)
|
||||
add_target_flags_if_supported("-fzos-le-char-mode=ebcdic")
|
||||
endif()
|
||||
if(LIBCXX_TARGET_TRIPLE)
|
||||
add_target_flags_if_supported("--target=${LIBCXX_TARGET_TRIPLE}")
|
||||
endif()
|
||||
|
|
|
@ -257,6 +257,10 @@
|
|||
# endif // defined(__GLIBC_PREREQ)
|
||||
#endif // defined(__linux__)
|
||||
|
||||
#if defined(__MVS__)
|
||||
# include <features.h> // for __NATIVE_ASCII_F
|
||||
#endif
|
||||
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
# if __LITTLE_ENDIAN__
|
||||
# define _LIBCPP_LITTLE_ENDIAN
|
||||
|
@ -1220,8 +1224,8 @@ extern "C" _LIBCPP_FUNC_VIS void __sanitizer_annotate_contiguous_container(
|
|||
#endif
|
||||
|
||||
#if defined(__BIONIC__) || defined(__NuttX__) || \
|
||||
defined(__Fuchsia__) || defined(__wasi__) || defined(_LIBCPP_HAS_MUSL_LIBC) || \
|
||||
defined(__MVS__) || defined(__OpenBSD__)
|
||||
defined(__Fuchsia__) || defined(__wasi__) || \
|
||||
defined(_LIBCPP_HAS_MUSL_LIBC) || defined(__OpenBSD__)
|
||||
#define _LIBCPP_PROVIDES_DEFAULT_RUNE_TABLE
|
||||
#endif
|
||||
|
||||
|
|
|
@ -511,6 +511,33 @@ public:
|
|||
# define _LIBCPP_CTYPE_MASK_IS_COMPOSITE_PRINT
|
||||
# define _LIBCPP_CTYPE_MASK_IS_COMPOSITE_ALPHA
|
||||
# define _LIBCPP_CTYPE_MASK_IS_COMPOSITE_XDIGIT
|
||||
#elif defined(__MVS__)
|
||||
static const mask __regex_word = 0x8000;
|
||||
# if defined(__NATIVE_ASCII_F)`
|
||||
typedef unsigned int mask;
|
||||
static const mask space = _ISSPACE_A;
|
||||
static const mask print = _ISPRINT_A;
|
||||
static const mask cntrl = _ISCNTRL_A;
|
||||
static const mask upper = _ISUPPER_A;
|
||||
static const mask lower = _ISLOWER_A;
|
||||
static const mask alpha = _ISALPHA_A;
|
||||
static const mask digit = _ISDIGIT_A;
|
||||
static const mask punct = _ISPUNCT_A;
|
||||
static const mask xdigit = _ISXDIGIT_A;
|
||||
static const mask blank = _ISBLANK_A;
|
||||
# else
|
||||
typedef unsigned short mask;
|
||||
static const mask space = __ISSPACE;
|
||||
static const mask print = __ISPRINT;
|
||||
static const mask cntrl = __ISCNTRL;
|
||||
static const mask upper = __ISUPPER;
|
||||
static const mask lower = __ISLOWER;
|
||||
static const mask alpha = __ISALPHA;
|
||||
static const mask digit = __ISDIGIT;
|
||||
static const mask punct = __ISPUNCT;
|
||||
static const mask xdigit = __ISXDIGIT;
|
||||
static const mask blank = __ISBLANK;
|
||||
# endif
|
||||
#else
|
||||
# error unknown rune table for this platform -- do you mean to define _LIBCPP_PROVIDES_DEFAULT_RUNE_TABLE?
|
||||
#endif
|
||||
|
@ -734,6 +761,10 @@ public:
|
|||
static const short* __classic_upper_table() _NOEXCEPT;
|
||||
static const short* __classic_lower_table() _NOEXCEPT;
|
||||
#endif
|
||||
#if defined(__MVS__)
|
||||
static const unsigned short* __classic_upper_table() _NOEXCEPT;
|
||||
static const unsigned short* __classic_lower_table() _NOEXCEPT;
|
||||
#endif
|
||||
|
||||
protected:
|
||||
~ctype();
|
||||
|
|
|
@ -1310,19 +1310,51 @@ regex_traits<_CharT>::isctype(char_type __c, char_class_type __m) const
|
|||
return (__c == '_' && (__m & __regex_word));
|
||||
}
|
||||
|
||||
inline _LIBCPP_INLINE_VISIBILITY
|
||||
bool __is_07(unsigned char c)
|
||||
{
|
||||
return (c & 0xF8u) ==
|
||||
#if defined(__MVS__) && !defined(__NATIVE_ASCII_F)
|
||||
0xF0;
|
||||
#else
|
||||
0x30;
|
||||
#endif
|
||||
}
|
||||
|
||||
inline _LIBCPP_INLINE_VISIBILITY
|
||||
bool __is_89(unsigned char c)
|
||||
{
|
||||
return (c & 0xFEu) ==
|
||||
#if defined(__MVS__) && !defined(__NATIVE_ASCII_F)
|
||||
0xF8;
|
||||
#else
|
||||
0x38;
|
||||
#endif
|
||||
}
|
||||
|
||||
inline _LIBCPP_INLINE_VISIBILITY
|
||||
unsigned char __to_lower(unsigned char c)
|
||||
{
|
||||
#if defined(__MVS__) && !defined(__NATIVE_ASCII_F)
|
||||
return c & 0xBF;
|
||||
#else
|
||||
return c | 0x20;
|
||||
#endif
|
||||
}
|
||||
|
||||
template <class _CharT>
|
||||
int
|
||||
regex_traits<_CharT>::__regex_traits_value(unsigned char __ch, int __radix)
|
||||
{
|
||||
if ((__ch & 0xF8u) == 0x30) // '0' <= __ch && __ch <= '7'
|
||||
if (__is_07(__ch)) // '0' <= __ch && __ch <= '7'
|
||||
return __ch - '0';
|
||||
if (__radix != 8)
|
||||
{
|
||||
if ((__ch & 0xFEu) == 0x38) // '8' <= __ch && __ch <= '9'
|
||||
if (__is_89(__ch)) // '8' <= __ch && __ch <= '9'
|
||||
return __ch - '0';
|
||||
if (__radix == 16)
|
||||
{
|
||||
__ch |= 0x20; // tolower
|
||||
__ch = __to_lower(__ch); // tolower
|
||||
if ('a' <= __ch && __ch <= 'f')
|
||||
return __ch - ('a' - 10);
|
||||
}
|
||||
|
|
|
@ -898,7 +898,7 @@ ctype<wchar_t>::do_toupper(char_type c) const
|
|||
#ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE
|
||||
return isascii(c) ? _DefaultRuneLocale.__mapupper[c] : c;
|
||||
#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || \
|
||||
defined(__NetBSD__)
|
||||
defined(__NetBSD__) || defined(__MVS__)
|
||||
return isascii(c) ? ctype<char>::__classic_upper_table()[c] : c;
|
||||
#else
|
||||
return (isascii(c) && iswlower_l(c, _LIBCPP_GET_C_LOCALE)) ? c-L'a'+L'A' : c;
|
||||
|
@ -912,7 +912,7 @@ ctype<wchar_t>::do_toupper(char_type* low, const char_type* high) const
|
|||
#ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE
|
||||
*low = isascii(*low) ? _DefaultRuneLocale.__mapupper[*low] : *low;
|
||||
#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || \
|
||||
defined(__NetBSD__)
|
||||
defined(__NetBSD__) || defined(__MVS__)
|
||||
*low = isascii(*low) ? ctype<char>::__classic_upper_table()[*low]
|
||||
: *low;
|
||||
#else
|
||||
|
@ -927,7 +927,7 @@ ctype<wchar_t>::do_tolower(char_type c) const
|
|||
#ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE
|
||||
return isascii(c) ? _DefaultRuneLocale.__maplower[c] : c;
|
||||
#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || \
|
||||
defined(__NetBSD__)
|
||||
defined(__NetBSD__) || defined(__MVS__)
|
||||
return isascii(c) ? ctype<char>::__classic_lower_table()[c] : c;
|
||||
#else
|
||||
return (isascii(c) && isupper_l(c, _LIBCPP_GET_C_LOCALE)) ? c-L'A'+'a' : c;
|
||||
|
@ -941,7 +941,7 @@ ctype<wchar_t>::do_tolower(char_type* low, const char_type* high) const
|
|||
#ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE
|
||||
*low = isascii(*low) ? _DefaultRuneLocale.__maplower[*low] : *low;
|
||||
#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || \
|
||||
defined(__NetBSD__)
|
||||
defined(__NetBSD__) || defined(__MVS__)
|
||||
*low = isascii(*low) ? ctype<char>::__classic_lower_table()[*low]
|
||||
: *low;
|
||||
#else
|
||||
|
@ -1013,7 +1013,7 @@ ctype<char>::do_toupper(char_type c) const
|
|||
static_cast<char>(_DefaultRuneLocale.__mapupper[static_cast<ptrdiff_t>(c)]) : c;
|
||||
#elif defined(__NetBSD__)
|
||||
return static_cast<char>(__classic_upper_table()[static_cast<unsigned char>(c)]);
|
||||
#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__)
|
||||
#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__MVS__)
|
||||
return isascii(c) ?
|
||||
static_cast<char>(__classic_upper_table()[static_cast<unsigned char>(c)]) : c;
|
||||
#else
|
||||
|
@ -1030,7 +1030,7 @@ ctype<char>::do_toupper(char_type* low, const char_type* high) const
|
|||
static_cast<char>(_DefaultRuneLocale.__mapupper[static_cast<ptrdiff_t>(*low)]) : *low;
|
||||
#elif defined(__NetBSD__)
|
||||
*low = static_cast<char>(__classic_upper_table()[static_cast<unsigned char>(*low)]);
|
||||
#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__)
|
||||
#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__MVS__)
|
||||
*low = isascii(*low) ?
|
||||
static_cast<char>(__classic_upper_table()[static_cast<size_t>(*low)]) : *low;
|
||||
#else
|
||||
|
@ -1047,7 +1047,7 @@ ctype<char>::do_tolower(char_type c) const
|
|||
static_cast<char>(_DefaultRuneLocale.__maplower[static_cast<ptrdiff_t>(c)]) : c;
|
||||
#elif defined(__NetBSD__)
|
||||
return static_cast<char>(__classic_lower_table()[static_cast<unsigned char>(c)]);
|
||||
#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__)
|
||||
#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__MVS__)
|
||||
return isascii(c) ?
|
||||
static_cast<char>(__classic_lower_table()[static_cast<size_t>(c)]) : c;
|
||||
#else
|
||||
|
@ -1063,7 +1063,7 @@ ctype<char>::do_tolower(char_type* low, const char_type* high) const
|
|||
*low = isascii(*low) ? static_cast<char>(_DefaultRuneLocale.__maplower[static_cast<ptrdiff_t>(*low)]) : *low;
|
||||
#elif defined(__NetBSD__)
|
||||
*low = static_cast<char>(__classic_lower_table()[static_cast<unsigned char>(*low)]);
|
||||
#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__)
|
||||
#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__MVS__)
|
||||
*low = isascii(*low) ? static_cast<char>(__classic_lower_table()[static_cast<size_t>(*low)]) : *low;
|
||||
#else
|
||||
*low = (isascii(*low) && isupper_l(*low, _LIBCPP_GET_C_LOCALE)) ? *low-'A'+'a' : *low;
|
||||
|
@ -1211,6 +1211,12 @@ ctype<char>::classic_table() noexcept
|
|||
return _ctype_ + 1;
|
||||
#elif defined(_AIX)
|
||||
return (const unsigned int *)__lc_ctype_ptr->obj->mask;
|
||||
#elif defined(__MVS__)
|
||||
# if defined(__NATIVE_ASCII_F)
|
||||
return const_cast<const ctype<char>::mask*> (__OBJ_DATA(__lc_ctype_a)->mask);
|
||||
# else
|
||||
return const_cast<const ctype<char>::mask*> (__ctypec);
|
||||
# endif
|
||||
#else
|
||||
// Platform not supported: abort so the person doing the port knows what to
|
||||
// fix
|
||||
|
@ -1259,7 +1265,26 @@ ctype<char>::__classic_upper_table() noexcept
|
|||
{
|
||||
return *__ctype_toupper_loc();
|
||||
}
|
||||
#endif // __GLIBC__ || __NETBSD__ || __EMSCRIPTEN__
|
||||
#elif defined(__MVS__)
|
||||
const unsigned short*
|
||||
ctype<char>::__classic_lower_table() _NOEXCEPT
|
||||
{
|
||||
# if defined(__NATIVE_ASCII_F)
|
||||
return const_cast<const unsigned short*>(__OBJ_DATA(__lc_ctype_a)->lower);
|
||||
# else
|
||||
return const_cast<const unsigned short*>(__ctype + __TOLOWER_INDEX);
|
||||
# endif
|
||||
}
|
||||
const unsigned short *
|
||||
ctype<char>::__classic_upper_table() _NOEXCEPT
|
||||
{
|
||||
# if defined(__NATIVE_ASCII_F)
|
||||
return const_cast<const unsigned short*>(__OBJ_DATA(__lc_ctype_a)->upper);
|
||||
# else
|
||||
return const_cast<const unsigned short*>(__ctype + __TOUPPER_INDEX);
|
||||
# endif
|
||||
}
|
||||
#endif // __GLIBC__ || __NETBSD__ || __EMSCRIPTEN__ || __MVS__
|
||||
|
||||
// template <> class ctype_byname<char>
|
||||
|
||||
|
|
|
@ -76,6 +76,125 @@ struct collationnames
|
|||
char char_;
|
||||
};
|
||||
|
||||
#if defined(__MVS__) && !defined(__NATIVE_ASCII_F)
|
||||
// EBCDIC IBM-1047
|
||||
// Sorted via the EBCDIC collating sequence
|
||||
const collationnames collatenames[] =
|
||||
{
|
||||
{"a", 0x81},
|
||||
{"alert", 0x2f},
|
||||
{"ampersand", 0x50},
|
||||
{"apostrophe", 0x7d},
|
||||
{"asterisk", 0x5c},
|
||||
{"b", 0x82},
|
||||
{"backslash", 0xe0},
|
||||
{"backspace", 0x16},
|
||||
{"c", 0x83},
|
||||
{"carriage-return", 0xd},
|
||||
{"circumflex", 0x5f},
|
||||
{"circumflex-accent", 0x5f},
|
||||
{"colon", 0x7a},
|
||||
{"comma", 0x6b},
|
||||
{"commercial-at", 0x7c},
|
||||
{"d", 0x84},
|
||||
{"dollar-sign", 0x5b},
|
||||
{"e", 0x85},
|
||||
{"eight", 0xf8},
|
||||
{"equals-sign", 0x7e},
|
||||
{"exclamation-mark", 0x5a},
|
||||
{"f", 0x86},
|
||||
{"five", 0xf5},
|
||||
{"form-feed", 0xc},
|
||||
{"four", 0xf4},
|
||||
{"full-stop", 0x4b},
|
||||
{"g", 0x87},
|
||||
{"grave-accent", 0x79},
|
||||
{"greater-than-sign", 0x6e},
|
||||
{"h", 0x88},
|
||||
{"hyphen", 0x60},
|
||||
{"hyphen-minus", 0x60},
|
||||
{"i", 0x89},
|
||||
{"j", 0x91},
|
||||
{"k", 0x92},
|
||||
{"l", 0x93},
|
||||
{"left-brace", 0xc0},
|
||||
{"left-curly-bracket", 0xc0},
|
||||
{"left-parenthesis", 0x4d},
|
||||
{"left-square-bracket", 0xad},
|
||||
{"less-than-sign", 0x4c},
|
||||
{"low-line", 0x6d},
|
||||
{"m", 0x94},
|
||||
{"n", 0x95},
|
||||
{"newline", 0x15},
|
||||
{"nine", 0xf9},
|
||||
{"number-sign", 0x7b},
|
||||
{"o", 0x96},
|
||||
{"one", 0xf1},
|
||||
{"p", 0x97},
|
||||
{"percent-sign", 0x6c},
|
||||
{"period", 0x4b},
|
||||
{"plus-sign", 0x4e},
|
||||
{"q", 0x98},
|
||||
{"question-mark", 0x6f},
|
||||
{"quotation-mark", 0x7f},
|
||||
{"r", 0x99},
|
||||
{"reverse-solidus", 0xe0},
|
||||
{"right-brace", 0xd0},
|
||||
{"right-curly-bracket", 0xd0},
|
||||
{"right-parenthesis", 0x5d},
|
||||
{"right-square-bracket", 0xbd},
|
||||
{"s", 0xa2},
|
||||
{"semicolon", 0x5e},
|
||||
{"seven", 0xf7},
|
||||
{"six", 0xf6},
|
||||
{"slash", 0x61},
|
||||
{"solidus", 0x61},
|
||||
{"space", 0x40},
|
||||
{"t", 0xa3},
|
||||
{"tab", 0x5},
|
||||
{"three", 0xf3},
|
||||
{"tilde", 0xa1},
|
||||
{"two", 0xf2},
|
||||
{"u", 0xa4},
|
||||
{"underscore", 0x6d},
|
||||
{"v", 0xa5},
|
||||
{"vertical-line", 0x4f},
|
||||
{"vertical-tab", 0xb},
|
||||
{"w", 0xa6},
|
||||
{"x", 0xa7},
|
||||
{"y", 0xa8},
|
||||
{"z", 0xa9},
|
||||
{"zero", 0xf0},
|
||||
{"A", 0xc1},
|
||||
{"B", 0xc2},
|
||||
{"C", 0xc3},
|
||||
{"D", 0xc4},
|
||||
{"E", 0xc5},
|
||||
{"F", 0xc6},
|
||||
{"G", 0xc7},
|
||||
{"H", 0xc8},
|
||||
{"I", 0xc9},
|
||||
{"J", 0xd1},
|
||||
{"K", 0xd2},
|
||||
{"L", 0xd3},
|
||||
{"M", 0xd4},
|
||||
{"N", 0xd5},
|
||||
{"NUL", 0},
|
||||
{"O", 0xd6},
|
||||
{"P", 0xd7},
|
||||
{"Q", 0xd8},
|
||||
{"R", 0xd9},
|
||||
{"S", 0xe2},
|
||||
{"T", 0xe3},
|
||||
{"U", 0xe4},
|
||||
{"V", 0xe5},
|
||||
{"W", 0xe6},
|
||||
{"X", 0xe7},
|
||||
{"Y", 0xe8},
|
||||
{"Z", 0xe9}
|
||||
};
|
||||
#else
|
||||
// ASCII
|
||||
const collationnames collatenames[] =
|
||||
{
|
||||
{"A", 0x41},
|
||||
|
@ -190,6 +309,7 @@ const collationnames collatenames[] =
|
|||
{"z", 0x7a},
|
||||
{"zero", 0x30}
|
||||
};
|
||||
#endif
|
||||
|
||||
struct classnames
|
||||
{
|
||||
|
|
|
@ -267,6 +267,9 @@ include(HandleLibcxxabiFlags)
|
|||
#===============================================================================
|
||||
|
||||
# Configure target flags
|
||||
if(ZOS)
|
||||
add_target_flags_if_supported("-fzos-le-char-mode=ebcdic")
|
||||
endif()
|
||||
if(LIBCXXABI_TARGET_TRIPLE)
|
||||
add_target_flags_if_supported("--target=${LIBCXXABI_TARGET_TRIPLE}")
|
||||
endif()
|
||||
|
|
Loading…
Reference in a new issue