[SystemZ][z/OS] ASCII/EBCDIC support with no coexistence

The aim of this patch is to break up the larger patch (https://reviews.llvm.org/D111323) to be more upstream friendly. In particular, this patch adds the char encoding sensitive changes but does not use inline namespaces as before. The use of namespaces to build both versions of the library, and localization of error messages will follow in a subsequent patch.

Differential Revision: https://reviews.llvm.org/D114813
This commit is contained in:
Muiez Ahmed 2022-01-14 11:35:53 -05:00
parent dac82b53e2
commit a1da73961d
7 changed files with 232 additions and 14 deletions

View file

@ -467,6 +467,9 @@ include(HandleLibcxxFlags)
# These flags get added to CMAKE_CXX_FLAGS and CMAKE_C_FLAGS so that
# 'config-ix' use them during feature checks. It also adds them to both
# 'LIBCXX_COMPILE_FLAGS' and 'LIBCXX_LINK_FLAGS'
if(ZOS)
add_target_flags_if_supported("-fzos-le-char-mode=ebcdic")
endif()
if(LIBCXX_TARGET_TRIPLE)
add_target_flags_if_supported("--target=${LIBCXX_TARGET_TRIPLE}")
endif()

View file

@ -257,6 +257,10 @@
# endif // defined(__GLIBC_PREREQ)
#endif // defined(__linux__)
#if defined(__MVS__)
# include <features.h> // for __NATIVE_ASCII_F
#endif
#ifdef __LITTLE_ENDIAN__
# if __LITTLE_ENDIAN__
# define _LIBCPP_LITTLE_ENDIAN
@ -1220,8 +1224,8 @@ extern "C" _LIBCPP_FUNC_VIS void __sanitizer_annotate_contiguous_container(
#endif
#if defined(__BIONIC__) || defined(__NuttX__) || \
defined(__Fuchsia__) || defined(__wasi__) || defined(_LIBCPP_HAS_MUSL_LIBC) || \
defined(__MVS__) || defined(__OpenBSD__)
defined(__Fuchsia__) || defined(__wasi__) || \
defined(_LIBCPP_HAS_MUSL_LIBC) || defined(__OpenBSD__)
#define _LIBCPP_PROVIDES_DEFAULT_RUNE_TABLE
#endif

View file

@ -511,6 +511,33 @@ public:
# define _LIBCPP_CTYPE_MASK_IS_COMPOSITE_PRINT
# define _LIBCPP_CTYPE_MASK_IS_COMPOSITE_ALPHA
# define _LIBCPP_CTYPE_MASK_IS_COMPOSITE_XDIGIT
#elif defined(__MVS__)
static const mask __regex_word = 0x8000;
# if defined(__NATIVE_ASCII_F)`
typedef unsigned int mask;
static const mask space = _ISSPACE_A;
static const mask print = _ISPRINT_A;
static const mask cntrl = _ISCNTRL_A;
static const mask upper = _ISUPPER_A;
static const mask lower = _ISLOWER_A;
static const mask alpha = _ISALPHA_A;
static const mask digit = _ISDIGIT_A;
static const mask punct = _ISPUNCT_A;
static const mask xdigit = _ISXDIGIT_A;
static const mask blank = _ISBLANK_A;
# else
typedef unsigned short mask;
static const mask space = __ISSPACE;
static const mask print = __ISPRINT;
static const mask cntrl = __ISCNTRL;
static const mask upper = __ISUPPER;
static const mask lower = __ISLOWER;
static const mask alpha = __ISALPHA;
static const mask digit = __ISDIGIT;
static const mask punct = __ISPUNCT;
static const mask xdigit = __ISXDIGIT;
static const mask blank = __ISBLANK;
# endif
#else
# error unknown rune table for this platform -- do you mean to define _LIBCPP_PROVIDES_DEFAULT_RUNE_TABLE?
#endif
@ -734,6 +761,10 @@ public:
static const short* __classic_upper_table() _NOEXCEPT;
static const short* __classic_lower_table() _NOEXCEPT;
#endif
#if defined(__MVS__)
static const unsigned short* __classic_upper_table() _NOEXCEPT;
static const unsigned short* __classic_lower_table() _NOEXCEPT;
#endif
protected:
~ctype();

View file

@ -1310,19 +1310,51 @@ regex_traits<_CharT>::isctype(char_type __c, char_class_type __m) const
return (__c == '_' && (__m & __regex_word));
}
inline _LIBCPP_INLINE_VISIBILITY
bool __is_07(unsigned char c)
{
return (c & 0xF8u) ==
#if defined(__MVS__) && !defined(__NATIVE_ASCII_F)
0xF0;
#else
0x30;
#endif
}
inline _LIBCPP_INLINE_VISIBILITY
bool __is_89(unsigned char c)
{
return (c & 0xFEu) ==
#if defined(__MVS__) && !defined(__NATIVE_ASCII_F)
0xF8;
#else
0x38;
#endif
}
inline _LIBCPP_INLINE_VISIBILITY
unsigned char __to_lower(unsigned char c)
{
#if defined(__MVS__) && !defined(__NATIVE_ASCII_F)
return c & 0xBF;
#else
return c | 0x20;
#endif
}
template <class _CharT>
int
regex_traits<_CharT>::__regex_traits_value(unsigned char __ch, int __radix)
{
if ((__ch & 0xF8u) == 0x30) // '0' <= __ch && __ch <= '7'
if (__is_07(__ch)) // '0' <= __ch && __ch <= '7'
return __ch - '0';
if (__radix != 8)
{
if ((__ch & 0xFEu) == 0x38) // '8' <= __ch && __ch <= '9'
if (__is_89(__ch)) // '8' <= __ch && __ch <= '9'
return __ch - '0';
if (__radix == 16)
{
__ch |= 0x20; // tolower
__ch = __to_lower(__ch); // tolower
if ('a' <= __ch && __ch <= 'f')
return __ch - ('a' - 10);
}

View file

@ -898,7 +898,7 @@ ctype<wchar_t>::do_toupper(char_type c) const
#ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE
return isascii(c) ? _DefaultRuneLocale.__mapupper[c] : c;
#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || \
defined(__NetBSD__)
defined(__NetBSD__) || defined(__MVS__)
return isascii(c) ? ctype<char>::__classic_upper_table()[c] : c;
#else
return (isascii(c) && iswlower_l(c, _LIBCPP_GET_C_LOCALE)) ? c-L'a'+L'A' : c;
@ -912,7 +912,7 @@ ctype<wchar_t>::do_toupper(char_type* low, const char_type* high) const
#ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE
*low = isascii(*low) ? _DefaultRuneLocale.__mapupper[*low] : *low;
#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || \
defined(__NetBSD__)
defined(__NetBSD__) || defined(__MVS__)
*low = isascii(*low) ? ctype<char>::__classic_upper_table()[*low]
: *low;
#else
@ -927,7 +927,7 @@ ctype<wchar_t>::do_tolower(char_type c) const
#ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE
return isascii(c) ? _DefaultRuneLocale.__maplower[c] : c;
#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || \
defined(__NetBSD__)
defined(__NetBSD__) || defined(__MVS__)
return isascii(c) ? ctype<char>::__classic_lower_table()[c] : c;
#else
return (isascii(c) && isupper_l(c, _LIBCPP_GET_C_LOCALE)) ? c-L'A'+'a' : c;
@ -941,7 +941,7 @@ ctype<wchar_t>::do_tolower(char_type* low, const char_type* high) const
#ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE
*low = isascii(*low) ? _DefaultRuneLocale.__maplower[*low] : *low;
#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || \
defined(__NetBSD__)
defined(__NetBSD__) || defined(__MVS__)
*low = isascii(*low) ? ctype<char>::__classic_lower_table()[*low]
: *low;
#else
@ -1013,7 +1013,7 @@ ctype<char>::do_toupper(char_type c) const
static_cast<char>(_DefaultRuneLocale.__mapupper[static_cast<ptrdiff_t>(c)]) : c;
#elif defined(__NetBSD__)
return static_cast<char>(__classic_upper_table()[static_cast<unsigned char>(c)]);
#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__)
#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__MVS__)
return isascii(c) ?
static_cast<char>(__classic_upper_table()[static_cast<unsigned char>(c)]) : c;
#else
@ -1030,7 +1030,7 @@ ctype<char>::do_toupper(char_type* low, const char_type* high) const
static_cast<char>(_DefaultRuneLocale.__mapupper[static_cast<ptrdiff_t>(*low)]) : *low;
#elif defined(__NetBSD__)
*low = static_cast<char>(__classic_upper_table()[static_cast<unsigned char>(*low)]);
#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__)
#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__MVS__)
*low = isascii(*low) ?
static_cast<char>(__classic_upper_table()[static_cast<size_t>(*low)]) : *low;
#else
@ -1047,7 +1047,7 @@ ctype<char>::do_tolower(char_type c) const
static_cast<char>(_DefaultRuneLocale.__maplower[static_cast<ptrdiff_t>(c)]) : c;
#elif defined(__NetBSD__)
return static_cast<char>(__classic_lower_table()[static_cast<unsigned char>(c)]);
#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__)
#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__MVS__)
return isascii(c) ?
static_cast<char>(__classic_lower_table()[static_cast<size_t>(c)]) : c;
#else
@ -1063,7 +1063,7 @@ ctype<char>::do_tolower(char_type* low, const char_type* high) const
*low = isascii(*low) ? static_cast<char>(_DefaultRuneLocale.__maplower[static_cast<ptrdiff_t>(*low)]) : *low;
#elif defined(__NetBSD__)
*low = static_cast<char>(__classic_lower_table()[static_cast<unsigned char>(*low)]);
#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__)
#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__MVS__)
*low = isascii(*low) ? static_cast<char>(__classic_lower_table()[static_cast<size_t>(*low)]) : *low;
#else
*low = (isascii(*low) && isupper_l(*low, _LIBCPP_GET_C_LOCALE)) ? *low-'A'+'a' : *low;
@ -1211,6 +1211,12 @@ ctype<char>::classic_table() noexcept
return _ctype_ + 1;
#elif defined(_AIX)
return (const unsigned int *)__lc_ctype_ptr->obj->mask;
#elif defined(__MVS__)
# if defined(__NATIVE_ASCII_F)
return const_cast<const ctype<char>::mask*> (__OBJ_DATA(__lc_ctype_a)->mask);
# else
return const_cast<const ctype<char>::mask*> (__ctypec);
# endif
#else
// Platform not supported: abort so the person doing the port knows what to
// fix
@ -1259,7 +1265,26 @@ ctype<char>::__classic_upper_table() noexcept
{
return *__ctype_toupper_loc();
}
#endif // __GLIBC__ || __NETBSD__ || __EMSCRIPTEN__
#elif defined(__MVS__)
const unsigned short*
ctype<char>::__classic_lower_table() _NOEXCEPT
{
# if defined(__NATIVE_ASCII_F)
return const_cast<const unsigned short*>(__OBJ_DATA(__lc_ctype_a)->lower);
# else
return const_cast<const unsigned short*>(__ctype + __TOLOWER_INDEX);
# endif
}
const unsigned short *
ctype<char>::__classic_upper_table() _NOEXCEPT
{
# if defined(__NATIVE_ASCII_F)
return const_cast<const unsigned short*>(__OBJ_DATA(__lc_ctype_a)->upper);
# else
return const_cast<const unsigned short*>(__ctype + __TOUPPER_INDEX);
# endif
}
#endif // __GLIBC__ || __NETBSD__ || __EMSCRIPTEN__ || __MVS__
// template <> class ctype_byname<char>

View file

@ -76,6 +76,125 @@ struct collationnames
char char_;
};
#if defined(__MVS__) && !defined(__NATIVE_ASCII_F)
// EBCDIC IBM-1047
// Sorted via the EBCDIC collating sequence
const collationnames collatenames[] =
{
{"a", 0x81},
{"alert", 0x2f},
{"ampersand", 0x50},
{"apostrophe", 0x7d},
{"asterisk", 0x5c},
{"b", 0x82},
{"backslash", 0xe0},
{"backspace", 0x16},
{"c", 0x83},
{"carriage-return", 0xd},
{"circumflex", 0x5f},
{"circumflex-accent", 0x5f},
{"colon", 0x7a},
{"comma", 0x6b},
{"commercial-at", 0x7c},
{"d", 0x84},
{"dollar-sign", 0x5b},
{"e", 0x85},
{"eight", 0xf8},
{"equals-sign", 0x7e},
{"exclamation-mark", 0x5a},
{"f", 0x86},
{"five", 0xf5},
{"form-feed", 0xc},
{"four", 0xf4},
{"full-stop", 0x4b},
{"g", 0x87},
{"grave-accent", 0x79},
{"greater-than-sign", 0x6e},
{"h", 0x88},
{"hyphen", 0x60},
{"hyphen-minus", 0x60},
{"i", 0x89},
{"j", 0x91},
{"k", 0x92},
{"l", 0x93},
{"left-brace", 0xc0},
{"left-curly-bracket", 0xc0},
{"left-parenthesis", 0x4d},
{"left-square-bracket", 0xad},
{"less-than-sign", 0x4c},
{"low-line", 0x6d},
{"m", 0x94},
{"n", 0x95},
{"newline", 0x15},
{"nine", 0xf9},
{"number-sign", 0x7b},
{"o", 0x96},
{"one", 0xf1},
{"p", 0x97},
{"percent-sign", 0x6c},
{"period", 0x4b},
{"plus-sign", 0x4e},
{"q", 0x98},
{"question-mark", 0x6f},
{"quotation-mark", 0x7f},
{"r", 0x99},
{"reverse-solidus", 0xe0},
{"right-brace", 0xd0},
{"right-curly-bracket", 0xd0},
{"right-parenthesis", 0x5d},
{"right-square-bracket", 0xbd},
{"s", 0xa2},
{"semicolon", 0x5e},
{"seven", 0xf7},
{"six", 0xf6},
{"slash", 0x61},
{"solidus", 0x61},
{"space", 0x40},
{"t", 0xa3},
{"tab", 0x5},
{"three", 0xf3},
{"tilde", 0xa1},
{"two", 0xf2},
{"u", 0xa4},
{"underscore", 0x6d},
{"v", 0xa5},
{"vertical-line", 0x4f},
{"vertical-tab", 0xb},
{"w", 0xa6},
{"x", 0xa7},
{"y", 0xa8},
{"z", 0xa9},
{"zero", 0xf0},
{"A", 0xc1},
{"B", 0xc2},
{"C", 0xc3},
{"D", 0xc4},
{"E", 0xc5},
{"F", 0xc6},
{"G", 0xc7},
{"H", 0xc8},
{"I", 0xc9},
{"J", 0xd1},
{"K", 0xd2},
{"L", 0xd3},
{"M", 0xd4},
{"N", 0xd5},
{"NUL", 0},
{"O", 0xd6},
{"P", 0xd7},
{"Q", 0xd8},
{"R", 0xd9},
{"S", 0xe2},
{"T", 0xe3},
{"U", 0xe4},
{"V", 0xe5},
{"W", 0xe6},
{"X", 0xe7},
{"Y", 0xe8},
{"Z", 0xe9}
};
#else
// ASCII
const collationnames collatenames[] =
{
{"A", 0x41},
@ -190,6 +309,7 @@ const collationnames collatenames[] =
{"z", 0x7a},
{"zero", 0x30}
};
#endif
struct classnames
{

View file

@ -267,6 +267,9 @@ include(HandleLibcxxabiFlags)
#===============================================================================
# Configure target flags
if(ZOS)
add_target_flags_if_supported("-fzos-le-char-mode=ebcdic")
endif()
if(LIBCXXABI_TARGET_TRIPLE)
add_target_flags_if_supported("--target=${LIBCXXABI_TARGET_TRIPLE}")
endif()