# HG changeset patch
# Parent  4587a24030e347df0461be19ec3b5814bf3993f5
Fix MinGW-Issue #39687; reimplement wcrtomb() and wcsrtombs().

* include/wchar.h [__MSVCRT_VERSION__ < __MSVCR80_DLL]
(wcrtomb, wcsrtombs): Implement them as static inline redirects to...
(__mingw_wcrtomb, __mingw_wcsrtombs): ...these; declare them.

* include/limits.h (MB_LEN_MAX): Update value; was 2, but should be 5.

* mingwex/wcsrtombs.c: New file; it implements...
(__mingw_wcsrtombs): ...this new function, which replaces...
(wcsrtombs): ...this; it was originally implemented...
* mingwex/wcrtomb.c: ...here; rewritten as new, it now implements...
(__mingw_wcrtomb): ...only this new function, which replaces...
(wcrtomb): ...this.

* mingwex/wcharmap.h: New private header; it declares the API for...
* mingwex/wcharmap.c: ...this new file, which implements...
(__mingw_wchar_to_mbcs_map): ...this new function, required by...
(__mingw_wcrtomb, __mingw_wcsrtombs): ...both of these.

* mingwex/codeset.c: New file; it implements...
(__mb_codeset_for_locale, __mb_len_max_for_codeset): ...this pair of
new helper functions; they identify the codeset, and respectively, its
MB_CUR_MAX for the effective process locale, which are required by...
(__mingw_wchar_to_mbcs_map): ...this.

* Makefile.in (libmingwex.a): Add dependency references for...
(codeset.$OBJEXT, wcharmap.$OBJEXT, wcsrtombs.$OBJEXT): ...these.

* msvcrt.def.in (wcrtomb, wcsrtombs): Require dlsym look-up for
MSVCRT.DLL entry point addresses.

diff --git a/mingwrt/Makefile.in b/mingwrt/Makefile.in
--- a/mingwrt/Makefile.in
+++ b/mingwrt/Makefile.in
@@ -466,13 +466,13 @@ libmingwex.a: $(addsuffix .$(OBJEXT), di
 libmingwex.a: $(addsuffix .$(OBJEXT), mkstemp mkdtemp cryptnam setenv)
 libmingwex.a: $(addsuffix .$(OBJEXT), getdelim gettimeofday)
 
 vpath %.s ${mingwrt_srcdir}/mingwex
 vpath %.sx ${mingwrt_srcdir}/mingwex
-libmingwex.a: $(addsuffix .$(OBJEXT), fwide mbrtowc mbsinit strnlen wcrtomb \
-  wcsnlen wcstof wcstold wctob wctrans wctype wmemchr wmemcmp wmemcpy wmemmove \
-  wmemset)
+libmingwex.a: $(addsuffix .$(OBJEXT), codeset fwide mbrtowc mbsinit strnlen \
+  wcharmap wcrtomb wcsrtombs wcsnlen wcstof wcstold wctob wctrans wctype wmemchr \
+  wmemcmp wmemcpy wmemmove wmemset)
 
 # The wcsnlen() function, enumerated above, is an adaptation of strnlen();
 # we need a specific rule to compile it, from shared source.
 #
 wcsnlen.$(OBJEXT): strnlen.sx
diff --git a/mingwrt/include/limits.h b/mingwrt/include/limits.h
--- a/mingwrt/include/limits.h
+++ b/mingwrt/include/limits.h
@@ -4,11 +4,11 @@
  * Manifest constants defining the sizes of integral types.
  *
  * $Id$
  *
  * Written by Colin Peters <colin@bird.fu.is.saga-u.ac.jp>
- * Copyright (C) 1997, 1999-2001, 2004, 2005, 2010, 2012, 2017,
+ * Copyright (C) 1997, 1999-2001, 2004, 2005, 2010, 2012, 2017, 2019,
  *   MinGW.org Project
  *
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -51,15 +51,17 @@
 # define PATH_MAX	260
 #endif
 
 /* Characteristics of the char data type.
  *
- * FIXME: Is MB_LEN_MAX correct?  Probably yes, for Microsoft MBCS, which
- * effectively seem to all be DBCS.
+ * FIXME: Is MB_LEN_MAX correct?  Earlier Microsoft documentation specified
+ * it as two, (which would probably have been okay, in the case of only DBCS
+ * encodings); today (2019), Microsoft's documentation says that five is the
+ * appropriate value.
  */
 #define CHAR_BIT	8
-#define MB_LEN_MAX	2
+#define MB_LEN_MAX	5
 
 #define SCHAR_MIN	(-128)
 #define SCHAR_MAX	127
 
 #define UCHAR_MAX	255
diff --git a/mingwrt/include/wchar.h b/mingwrt/include/wchar.h
--- a/mingwrt/include/wchar.h
+++ b/mingwrt/include/wchar.h
@@ -6,11 +6,12 @@
  *
  * $Id$
  *
  * Unattributed original source.
  * Adapted by Rob Savoye <rob@cygnus.com>
- * Copyright (C) 1997, 1999-2009, 2011, 2015, 2016, 2018, MinGW.org Project.
+ * Copyright (C) 1997, 1999-2009, 2011, 2015, 2016, 2018, 2019,
+ *   MinGW.org Project.
  *
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
  * to deal in the Software without restriction, including without limitation
@@ -526,32 +527,63 @@ extern size_t __mingw_wcsnlen (const wch
 typedef wchar_t  _Wint_t;
 #endif
 
 typedef int mbstate_t;
 
-/* The following multi-byte character conversion functions are
- * implemented in libmingwex.a, (and maybe also in some non-free
- * Microsoft libraries, such as MSVCP60.DLL and later).
+/* The following multi-byte character conversion functions have been
+ * implemented by Microsoft, in non-free MSVCR80.DLL and later, (and
+ * maybe also in some earlier non-free DLLs, such as MSVCP60.DLL and
+ * later); they are also available in MSVCRT.DLL, from Vista onward,
+ * but to provide continuing support for earlier Windows versions,
+ * we invoke them via MinGW specific wrappers, defined below.
  */
 __cdecl __MINGW_NOTHROW  wint_t btowc (int);
 __cdecl __MINGW_NOTHROW  int wctob (wint_t);
 
-__cdecl __MINGW_NOTHROW
-size_t mbrlen (const char *__restrict__, size_t, mbstate_t *__restrict__);
+__cdecl __MINGW_NOTHROW  size_t mbrlen
+(const char *__restrict__, size_t, mbstate_t *__restrict__);
 
 __cdecl __MINGW_NOTHROW  size_t mbrtowc
 (wchar_t *__restrict__, const char *__restrict__, size_t, mbstate_t *__restrict__);
 
 __cdecl __MINGW_NOTHROW  size_t mbsrtowcs
 (wchar_t *__restrict__, const char **__restrict__, size_t, mbstate_t *__restrict__);
 
-__cdecl __MINGW_NOTHROW
-size_t wcrtomb (char * __restrict__, wchar_t, mbstate_t *__restrict__);
+__cdecl __MINGW_NOTHROW  size_t wcrtomb
+(char * __restrict__, wchar_t, mbstate_t *__restrict__);
 
 __cdecl __MINGW_NOTHROW  size_t wcsrtombs
 (char *__restrict__, const wchar_t **__restrict__, size_t, mbstate_t *__restrict__);
 
+/* To provide support for the above, on legacy Windows versions,
+ * we implement fall back wrappers in libmingwex.a; each of these
+ * will delegate to the corresponding Microsoft implementation, if
+ * it exists in the process address space; otherwise, execution
+ * will fall back to a MinGW implementation.
+ */
+__cdecl __MINGW_NOTHROW  size_t __mingw_wcrtomb
+(char * __restrict__, wchar_t, mbstate_t *__restrict__);
+
+__cdecl __MINGW_NOTHROW  size_t __mingw_wcsrtombs
+(char *__restrict__, const wchar_t **__restrict__, size_t, mbstate_t *__restrict__);
+
+#if __MSVCRT_VERSION__ < __MSVCR80_DLL
+/* For linking with all versions of MSVCRT.DLL, and with non-free
+ * alternatives predating MSVCR80.DLL, we enforce inline mapping to
+ * the libmingwex.a implementations, (which will delegate the calls
+ * to the Microsoft DLL implementations, when they are available).
+ */
+__CRT_ALIAS __cdecl __MINGW_NOTHROW  size_t wcrtomb
+(char * __mbc, wchar_t __wc, mbstate_t *__ps)
+{ return __mingw_wcrtomb(__mbc, __wc, __ps); }
+
+__CRT_ALIAS __cdecl __MINGW_NOTHROW  size_t wcsrtombs
+(char *__mbs, const wchar_t **__wcs, size_t __len, mbstate_t *__ps)
+{ return __mingw_wcsrtombs(__mbs, __wcs, __len, __ps); }
+
+#endif	/* ! MSVCR80.DLL or later */
+
 #if defined _ISOC99_SOURCE || defined __cplusplus
 /* These ISO-C99 functions are implemented in libmingwex.a,
  * or, in some cases, as inline stubs; while provided as MinGW
  * extensions to support ISO-C99, they are also required by
  * GNU C++.
diff --git a/mingwrt/mingwex/codeset.c b/mingwrt/mingwex/codeset.c
new file mode 100644
--- /dev/null
+++ b/mingwrt/mingwex/codeset.c
@@ -0,0 +1,111 @@
+/*
+ * codeset.c
+ *
+ * Provides implementation-private helper functions, to identify the
+ * code page which is associated with the active process locale, and to
+ * establish the effective MB_CUR_MAX value for this code page.
+ *
+ * $Id$
+ *
+ * Written by Keith Marshall <keith@users.osdn.me>
+ * Copyright (C) 2019, MinGW.org Project
+ *
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice, this permission notice, and the following
+ * disclaimer shall be included in all copies or substantial portions of
+ * the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OF OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <locale.h>
+#include <stdlib.h>
+#include <string.h>
+#include <winnls.h>
+
+unsigned int __mb_codeset_for_locale( void );
+unsigned int __mb_len_max_for_codeset( unsigned int );
+
+unsigned int __mb_codeset_for_locale( void )
+{
+  /* Extract the code page identification string (if any) from the LC_CTYPE
+   * identification string, as returned in "language[_region[.codeset]]", or
+   * ".codeset" format, by a setlocale() query on the current locale.
+   */
+  char *default_locale_specification, *codeset_string;
+  if( (default_locale_specification = setlocale( LC_CTYPE, NULL )) != NULL )
+  {
+    /* An unfortunate -- albeit documented -- limitation of Microsoft's
+     * setlocale() implementation is that it cannot correctly process any
+     * locale specification which refers to a MBCS codeset which may use
+     * more than two bytes for any single code point; to mitigate this,
+     * when the active locale matches the system default...
+     */
+    char string_buffer[1 + strlen( default_locale_specification )];
+    codeset_string = strcpy( string_buffer, default_locale_specification );
+    if( strcmp( codeset_string, setlocale( LC_CTYPE, "" )) == 0 )
+    {
+      /* ...although Microsoft's setlocale() doesn't support it, (and
+       * is neither expected to, nor required to), we may adopt POSIX.1
+       * convention, in this particular case, to acquire a preferred
+       * default locale specification from the environment...
+       */
+      if( ((default_locale_specification = getenv( "LC_ALL" )) != NULL)
+       || ((default_locale_specification = getenv( "LC_CTYPE" )) != NULL)
+       || ((default_locale_specification = getenv( "LANG" )) != NULL)     )
+
+	/* ...and use that in place of Microsoft's setlocale() notion
+	 * of the current effective LC_CTYPE locale category.
+	 */
+	codeset_string = default_locale_specification;
+    }
+    else
+    { /* The originally active locale does NOT match the system default,
+       * but we made it do so, by checking, so restore the original.
+       */
+      setlocale( LC_CTYPE, codeset_string );
+    }
+    /* Regardless of how we established the effective LC_CTYPE category
+     * for the active locale, we may extract its codeset element...
+     */
+    if( (codeset_string = strchr( codeset_string, '.' )) != NULL )
+    {
+      /* ...interpreting the resultant string as its equivalent integer
+       * value, for validation and return.
+       */
+      unsigned int retval = (unsigned int)(atoi( codeset_string + 1 ));
+      if( __mb_len_max_for_codeset( retval ) > 0 ) return retval;
+    }
+  }
+  /* In the event that LC_CTYPE doesn't include a codeset identification,
+   * return an effective value of zero, which we may later interpret as a
+   * default representation for the "C" locale.
+   */
+  return 0;
+}
+
+unsigned int __mb_len_max_for_codeset( unsigned int codeset )
+{
+  /* Identify the length of the longest valid multibyte character encoding
+   * sequence, used within the specified MS-Windows code page, by consulting
+   * the relevant Win32 API database.  Returns the appropriate byte count,
+   * or zero if the codeset identifier is not valid.
+   */
+  CPINFO codeset_info;
+  return (GetCPInfo( codeset, &codeset_info )) ? codeset_info.MaxCharSize : 0;
+}
+
+/* $RCSfile$: end of file */
diff --git a/mingwrt/mingwex/wcharmap.c b/mingwrt/mingwex/wcharmap.c
new file mode 100644
--- /dev/null
+++ b/mingwrt/mingwex/wcharmap.c
@@ -0,0 +1,172 @@
+/*
+ * wcharmap.c
+ *
+ * Provides an implementation-private helper function, to facilitate
+ * conversion from UTF-16LE wchar_t data, of arbitrary length, to an
+ * equivalent multi-byte character encoding sequence.
+ *
+ * $Id$
+ *
+ * Written by Keith Marshall <keith@users.osdn.me>
+ * Copyright (C) 2019, MinGW.org Project
+ *
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice, this permission notice, and the following
+ * disclaimer shall be included in all copies or substantial portions of
+ * the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OF OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "wcharmap.h"
+
+#include <limits.h>
+
+size_t __mingw_wchar_to_mbcs_map
+( unsigned cp, char *mbs, int mblen, const wchar_t *wcs, int wclen )
+{
+  /* Helper function to map a sequence of wchars to their corresponding
+   * sequence of multibyte characters, encoded as is appropriate for the
+   * specified code page, (which is nominally the code page associated
+   * with the current locale).
+   *
+   * Inputs:
+   *   cp	The code page for which encoding is to be performed.
+   *
+   *   mbs	Buffer in which the encoded multibyte sequence may be
+   *    	returned, or NULL, if only the sequence length is to
+   *    	be determined, discarding the encoded data.
+   *
+   *   mblen	Number of bytes available in mbs; ignured if mbs is
+   *    	passed as NULL.
+   *
+   *   wcs	The sequence of wchars which is to be encoded.
+   *
+   *   wclen	The number of wchars in wcs; if passed as (size_t)(-1),
+   *    	scan until (wchar_t)(0), or until a wchar with no valid
+   *    	encoding, or space in the encoding buffer is exhausted.
+   *
+   * Returns:
+   *   The number of encoded bytes (which would be) stored into mbs, if
+   *   mbs is not NULL, and all specifed wchars in wcs are successfully
+   *   encoded; otherwise, returns (size_t)(-1), and sets errno to:
+   *
+   *   EILSEQ	If encoding is interrupted by a wchar with no valid
+   *    	encoding within the specified code page.
+   *
+   *   ENOMEM	The mbs pointer isn't NULL, but there is insufficient
+   *    	space in the designated buffer to store the encoded
+   *    	multibyte character sequence.
+   */
+  size_t retval; int eilseq_flag = 0;
+
+  if( cp == 0 )
+  { /* Code page zero is assumed to represent the encoding which applies
+     * within the "C" locale; this is a single-byte encoding, with wchar
+     * values in the range L'\0'..L'\255' mapped to their identical byte
+     * values, and all greater wchar values considered to be invalid.
+     *
+     * Simply scan, count, and optionally store valid byte values,
+     * starting from an initial count of zero.
+     */
+    retval = 0;
+
+    if( (size_t)(wclen) == (size_t)(-1) )
+      do { /* This is an unbounded scan; simply check that each
+	    * successive wchar lies in the valid range...
+	    */
+	   if( (unsigned)(*wcs) > UCHAR_MAX )
+	     /* ...otherwise, report an invalid encoding, and
+	      * bail out.
+	      */
+	     return errout( EILSEQ, wclen );
+
+	   /* We got a valid input wchar...
+	    */
+	   if( mbs != NULL )
+	   { /* ...which we are now expected to store...
+	      */
+	     if( mblen-- > 0 ) *mbs++ = (unsigned char)(*wcs);
+
+	     /* ...but, we must bail out, if there is no
+	      * space left in the encoding buffer.
+	      */
+	     else return errout( ENOMEM, (size_t)(-1) );
+	   }
+
+	   /* We've accepted the current input wchar; count
+	    * it, and then, provided it isn't the terminating
+	    * NUL, move on to the next.
+	    */
+	   ++retval;
+	 } while( *wcs++ != L'\0' );
+
+    else while( wclen-- > 0 )
+    { /* This is a bounded scan; as in the unbounded case, take
+       * each input wchar in turn, and verify that each lies in
+       * the valid encoding range.
+       */
+      if( (unsigned)(*wcs) > UCHAR_MAX )
+	return errout( EILSEQ, (size_t)(-1) );
+
+      /* We got a valid input wchar...
+       */
+      if( mbs != NULL )
+      { /* ...which we are now expected to store...
+	 */
+	if( mblen-- > 0 ) *mbs++ = (unsigned char)(*wcs);
+
+	/* ...but, we must bail out, if there is no
+   	 * space left in the encoding buffer.
+	 */
+	else return errout( ENOMEM, (size_t)(-1) );
+      }
+
+      /* Ensure that we don't scan beyond a terminating NUL
+       * wchar, even if this lies within the bounded count.
+       */
+      if( *wcs++ == L'\0' ) wclen = 0;
+
+      /* In any case, count the current encoded byte.
+       */
+      ++retval;
+    }
+
+    /* We now have the final count, for a code page zero encoding;
+     * we are done.
+     */
+    return retval;
+  }
+
+  /* For any code page other than zero, we delegate both encoding
+   * and byte counting to the Windows API; note that for code pages
+   * other than CP_UTF7 or CP_UTF8, (and CP_UTF8 is the only code
+   * page with an identifier greater than that for CP_UTF7), there
+   * may be unrepresentable UTF-16 code points, and we must pass a
+   * flag reference to detect their presence in the UTF-16LE input
+   * sequence; OTOH, any valid UTF-16 code point is representable
+   * in both CP_UTF7 and CP_UTF8, so no such flag is required, and
+   * WideCharToMultiByte() will choke, if the flag reference is
+   * not passed as NULL.
+   */
+  retval = WideCharToMultiByte( cp, 0, wcs, wclen, mbs, mblen, NULL,
+      (CP_UTF7 > cp) ? &eilseq_flag : NULL
+    );
+  return (eilseq_flag || (retval == 0)) ? errout( EILSEQ, (size_t)(-1) )
+    : retval;
+}
+
+/* $RCSfile$: end of file */
diff --git a/mingwrt/mingwex/wcharmap.h b/mingwrt/mingwex/wcharmap.h
new file mode 100644
--- /dev/null
+++ b/mingwrt/mingwex/wcharmap.h
@@ -0,0 +1,62 @@
+/*
+ * wcharmap.h
+ *
+ * Private header file, declaring common components of the MinGW.org
+ * fallback implementations of wide to multi-byte (and complementary)
+ * character set conversion API functions.
+ *
+ * $Id$
+ *
+ * Written by Keith Marshall <keith@users.osdn.me>
+ * Copyright (C) 2019, MinGW.org Project
+ *
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice, this permission notice, and the following
+ * disclaimer shall be included in all copies or substantial portions of
+ * the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OF OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <wchar.h>
+#include <winnls.h>
+#include <stdlib.h>
+#include <errno.h>
+
+/* Define a pair of inline helper functions, to facilitate preservation
+ * of the "errno" state on entry, such that it may be restored or modified,
+ * as necessary for ISO-C99 conformance, on function return.
+ *
+ * First, a helper to save, and clear, error state on entry...
+ */
+static __inline__ __attribute__((__always_inline__))
+int save_error_status_and_clear (int state, int clear)
+{ errno = clear; return state; }
+
+/* ...and the complementary helper, which may be used to either restore
+ * the saved state, or to report a new error condition, on return.
+ */
+static __inline__ __attribute__((__always_inline__))
+size_t errout (int errcode, size_t status){ errno = errcode; return status; }
+
+unsigned int __mb_codeset_for_locale (void);
+unsigned int __mb_len_max_for_codeset (unsigned int);
+size_t __mingw_wchar_to_mbcs_map (unsigned, char *, int, const wchar_t *, int);
+
+static __inline__ __attribute__((__always_inline__))
+unsigned int get_codepage(){ return __mb_codeset_for_locale(); }
+
+/* $RCSfile$: end of file */
diff --git a/mingwrt/mingwex/wcrtomb.c b/mingwrt/mingwex/wcrtomb.c
--- a/mingwrt/mingwex/wcrtomb.c
+++ b/mingwrt/mingwex/wcrtomb.c
@@ -1,94 +1,113 @@
-#include "mb_wc_common.h"
-#include <wchar.h>
-#include <stdlib.h>
-#include <errno.h>
-#include <limits.h>
-#define WIN32_LEAN_AND_MEAN
-#include <windows.h>
+/*
+ * wcrtomb.c
+ *
+ * MinGW.org replacement for the wcrtomb() function; delegates to the
+ * Microsoft implementation, if available in the C runtime DLL, otherwise
+ * handles the call locally.
+ *
+ * $Id$
+ *
+ * Written by Keith Marshall <keith@users.osdn.me>
+ * Copyright (C) 2019, MinGW.org Project
+ *
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice, this permission notice, and the following
+ * disclaimer shall be included in all copies or substantial portions of
+ * the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OF OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "wcharmap.h"
 
+/* For runtime delegation, we need a mechanism for detection of an
+ * implementation, within the default C runtime DLL; we may use the
+ * MinGW dlfcn emulation, to facilitate this.
+ */
+#include <dlfcn.h>
 
-static int __MINGW_ATTRIB_NONNULL(1)
- __wcrtomb_cp (char *dst, wchar_t wc, const unsigned int cp,
-	       const unsigned int mb_max)
+/* We need to look up the effective working codeset, before choosing
+ * between MSVCRT.DLL and MinGW fallback implementations; to avoid a
+ * need to look it up again, within the MinGW fallback, we store the
+ * result of the initial look up in this file-global variable.
+ */
+static unsigned int codeset;
+
+static size_t __mingw_wcrtomb_fallback
+( char *restrict mb, wchar_t wc, mbstate_t *__UNUSED_PARAM(ps) )
+# define mbcs_map __mingw_wchar_to_mbcs_map
 {
-  if (cp == 0)
-    {
-      if (wc > 255)
-	{
-	  errno = EILSEQ;
-	  return -1;
-	}
-      *dst = (char) wc;
-      return 1;
-    }
-  else
-    {
-      int invalid_char = 0;
+  /* Fallback function, providing an implementation of the wcrtomb()
+   * function, when none is available within the Microsoft runtime.
+   *
+   * When mb is a NULL pointer, ISO-C99 decrees that the call shall
+   * be interpreted as the equivalent of:
+   *
+   *   wcrtomb( internal_buffer, L'\0', ps );
+   *
+   * with the encoding of the NUL wchar, preceded by any sequence
+   * of bytes needed restore ps to the initial shift state, being
+   * stored in the internal buffer, (and thus, inaccessible to the
+   * caller).  Since Microsoft's MBCS encodings do not use shift
+   * states, and the encoding for NUL is always a single NUL byte,
+   * this becomes the equivalent of returning (size_t)(1).
+   */
+  if( mb == NULL ) return (size_t)(1);
 
-      int size = WideCharToMultiByte (cp, 0 /* Is this correct flag? */,
-				      &wc, 1, dst, mb_max,
-				      NULL, &invalid_char);
-      if (size == 0 || invalid_char)
-        {
-          errno = EILSEQ;
-          return -1;
-        }
-      return size;
-    }
+  /* Otherwise, we return the byte count, and effect of encoding
+   * the single wchar passed by value in wc.
+   */
+  return mbcs_map( codeset, mb, MB_CUR_MAX, &wc, 1 );
 }
 
-size_t
-wcrtomb (char *dst, wchar_t wc, mbstate_t * __UNUSED_PARAM (ps))
+size_t __mingw_wcrtomb( char *restrict mb, wchar_t wc, mbstate_t *restrict ps )
 {
-  char byte_bucket [MB_LEN_MAX];
-  char* tmp_dst = dst ? dst : byte_bucket;
-  return (size_t)__wcrtomb_cp (tmp_dst, wc, get_codepage (),
-			       MB_CUR_MAX);
+  /* Wrapper for the wcrtomb() function; it will initially attempt
+   * to delegate the call to a Microsoft-provided implementation, but
+   * if no such implementation can be found, fall back to the MinGW
+   * substitute (defined above).
+   */
+  typedef size_t (*redirect_t)( char *restrict, wchar_t, mbstate_t *restrict );
+  static redirect_t redirector_hook = NULL;
+
+  /* MSVCRT.DLL's setlocale() cannot reliably handle code pages with
+   * more than two bytes per code point, (e.g. UTF-7 and UTF-8); thus,
+   * Microsoft's wcsrtombs() is likely to be similarly unreliable, so
+   * always use the MinGW fallback with such code pages.
+   */
+  if( __mb_len_max_for_codeset( codeset = __mb_codeset_for_locale() ) > 2 )
+    return __mingw_wcrtomb_fallback( mb, wc, ps );
+
+  /* On first time call, we don't know which implementation is to be
+   * selected; look for a Microsoft implementation, which, if available,
+   * may be registered for immediate use on this, and any subsequent,
+   * calls to this function wrapper...
+   */
+  if(  (redirector_hook == NULL)
+  &&  ((redirector_hook = dlsym( RTLD_DEFAULT, "wcrtomb" )) == NULL)  )
+
+    /* ...but when no Microsoft implementation can be found, register
+     * the MinGW fall back in its stead.
+     */
+    redirector_hook = __mingw_wcrtomb_fallback;
+
+  /* Finally, delegate the call to whichever implementation has been
+   * registered on first-time call.
+   */
+  return redirector_hook( mb, wc, ps );
 }
 
-size_t wcsrtombs (char *dst, const wchar_t **src, size_t len,
-		  mbstate_t * __UNUSED_PARAM (ps))
-{
-  int ret = 0;
-  size_t n = 0;
-  const unsigned int cp = get_codepage();
-  const unsigned int mb_max = MB_CUR_MAX;
-  const wchar_t *pwc = *src;
-
-  if (src == NULL || *src == NULL) /* undefined behavior */
-     return 0;
-
-  if (dst != NULL)
-    {
-       while (n < len)
-        {
-          if ((ret = __wcrtomb_cp (dst, *pwc, cp, mb_max)) <= 0)
-	     return (size_t) -1;
-  	  n += ret;
-   	  dst += ret;
-          if (*(dst - 1) == '\0')
-	    {
-	      *src = (wchar_t*) NULL;;
-	      return (n  - 1);
-	    }
-	  pwc++;
-        }
-      *src = pwc;
-    }
-  else
-    {
-      char byte_bucket [MB_LEN_MAX];
-      while (n < len)
-        {
-	  if ((ret = __wcrtomb_cp (byte_bucket, *pwc, cp, mb_max))
-		 <= 0)
- 	    return (size_t) -1;
-	  n += ret;
-	  if (byte_bucket [ret - 1] == '\0')
-	    return (n - 1);
-          pwc++;
-        }
-    }
-
-  return n;
-}
+/* $RCSfile$: end of file */
diff --git a/mingwrt/mingwex/wcsrtombs.c b/mingwrt/mingwex/wcsrtombs.c
new file mode 100644
--- /dev/null
+++ b/mingwrt/mingwex/wcsrtombs.c
@@ -0,0 +1,167 @@
+/*
+ * wcsrtombs.c
+ *
+ * MinGW.org replacement for the wcsrtombs() function; delegates to the
+ * Microsoft implementation, if available in the C runtime DLL, otherwise
+ * handles the call locally.
+ *
+ * $Id$
+ *
+ * Written by Keith Marshall <keith@users.osdn.me>
+ * Copyright (C) 2019, MinGW.org Project
+ *
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice, this permission notice, and the following
+ * disclaimer shall be included in all copies or substantial portions of
+ * the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OF OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "wcharmap.h"
+
+/* For runtime delegation, we need a mechanism for detection of an
+ * implementation, within the default C runtime DLL; we may use the
+ * MinGW dlfcn emulation, to facilitate this.
+ */
+#include <dlfcn.h>
+
+/* We need to look up the effective working codeset, before choosing
+ * between MSVCRT.DLL and MinGW fallback implementations; to avoid a
+ * need to look it up again, within the MinGW fallback, we store the
+ * result of the initial look up in this file-global variable.
+ */
+static unsigned int codeset;
+
+static size_t __mingw_wcsrtombs_fallback
+( char *restrict mbs, const wchar_t **restrict wcs, size_t len, mbstate_t *__UNUSED_PARAM(ps) )
+# define mbcs_map __mingw_wchar_to_mbcs_map
+{
+  /* Fallback function, providing an implementation of the wcsrtombs()
+   * function, when none is available within the Microsoft runtime.
+   *
+   * Initially, identify the code page for which the multibyte encoding
+   * is required, save the current errno state, so that we may restore
+   * it on return, clear it to zero for internal checking, and compute
+   * the size of buffer required to accommodate the conversion.
+   */
+  int errno_reset = save_error_status_and_clear( errno, 0 );
+  size_t wanted = mbcs_map( codeset, NULL, 0, *wcs, -1 );
+
+  if( mbs == NULL )
+    /* There is no buffer designated to store the encoded multibyte
+     * character sequence; we are only interested in the size of the
+     * buffer which would otherwise be required, and we've already
+     * determined that, so simply return it.
+     */
+    return (errno == 0) ? errout( errno_reset, wanted - 1 ) : wanted;
+
+  if( (errno == 0) && (len >= wanted) )
+  { /* There is an encoding buffer designated, its size is sufficient
+     * to accommodate the encoding of the entire NUL terminated input
+     * sequence, and there was no incipient encoding error during the
+     * initial minimum buffer size determination; encode the entire
+     * input sequence for return, and clean up the input state.
+     */
+    len = mbcs_map( codeset, mbs, len, *wcs, -1 ) - 1;
+    *wcs = NULL;
+  }
+
+  else
+  { /* There is an encoding buffer designated, but either it is too
+     * small, or a incipient encoding error has been detected; rescan
+     * the input sequence, encoding one code point at a time, until we
+     * either exhaust the encoding buffer space, or we encounter the
+     * encoding error previously identified.
+     */
+    size_t count = 0; errno = 0;
+    while( (len >= mbcs_map( codeset, NULL, 0, *wcs, 1 )) && (errno == 0) )
+    {
+      /* There is still sufficient space to store the encoding of one
+       * more input code point, and we haven't yet fallen foul of any
+       * incipient encoding error; store this encoding, and adjust to
+       * prepare for the next.
+       */
+      size_t step = mbcs_map( codeset, mbs, len, (*wcs)++, 1 );
+      count += step; len -= step; mbs += step;
+    }
+
+    /* Check that we didn't fall foul of any incipient encoding error;
+     * if we did, then we must bail out.
+     */
+    if( errno != 0 ) return (size_t)(-1);
+
+    /* If we're still here, then we've encoded as much of the input
+     * sequence as we can accommodate; the input pointer has already
+     * been adjusted, as required, but we must preserve the count of
+     * cumulatively encoded bytes, for return.
+     */
+    len = count;
+  }
+
+  /* We have now successfully encoded as much of the input sequence
+   * as possible, without encountering any encoding error; restore
+   * the saved errno state, and return the encoded byte count.
+   */
+  return errout( errno_reset, len );
+}
+
+size_t __mingw_wcsrtombs
+( char *mbs, const wchar_t **wcs, size_t len, mbstate_t *ps )
+{
+  /* Wrapper for the wcsrtombs() function; it will initially attempt
+   * to delegate the call to a Microsoft-provided implementation, but
+   * if no such implementation can be found, fall back to the MinGW
+   * substitute (defined above).
+   */
+  typedef size_t (*redirect_t)(char *, const wchar_t **, size_t, mbstate_t *);
+  static redirect_t redirector_hook = NULL;
+
+  /* Neither wcs, not the pointer to which it refers, may be NULL.
+   * ISO C doesn't specify any particular outcome for this condition,
+   * (so a segmentation fault would conform); it makes more sense to
+   * catch the abnormality, and bail out.
+   */
+  if( (wcs == NULL) || (*wcs == NULL) ) return errout( EINVAL, (size_t)(-1) );
+
+  /* MSVCRT.DLL's setlocale() cannot reliably handle code pages with
+   * more than two bytes per code point, (e.g. UTF-7 and UTF-8); thus,
+   * Microsoft's wcsrtombs() is likely to be similarly unreliable, so
+   * always use the MinGW fallback with such code pages.
+   */
+  if( __mb_len_max_for_codeset( codeset = __mb_codeset_for_locale() ) > 2 )
+    return __mingw_wcsrtombs_fallback( mbs, wcs, len, ps );
+
+  /* On first time call, we don't know which implementation is to be
+   * selected; look for a Microsoft implementation, which, if available,
+   * may be registered for immediate use on this, and any subsequent,
+   * calls to this function wrapper...
+   */
+  if(  (redirector_hook == NULL)
+  &&  ((redirector_hook = dlsym( RTLD_DEFAULT, "wcsrtombs" )) == NULL)  )
+  {
+    /* ...but when no Microsoft implementation can be found, register
+     * the MinGW fallback in its stead.
+     */
+    redirector_hook = __mingw_wcsrtombs_fallback;
+  }
+  /* Finally, delegate the call to whichever implementation has been
+   * registered on first-time call.
+   */
+  return redirector_hook( mbs, wcs, len, ps );
+}
+
+/* $RCSfile$: end of file */
diff --git a/mingwrt/msvcrt-xref/msvcrt.def.in b/mingwrt/msvcrt-xref/msvcrt.def.in
--- a/mingwrt/msvcrt-xref/msvcrt.def.in
+++ b/mingwrt/msvcrt-xref/msvcrt.def.in
@@ -3563,11 +3563,11 @@ vwprintf
 vwprintf_s
 #  if __MSVCRT_VERSION__ >= 0x12000000UL
 vwscanf
 vwscanf_s
 #  endif
-wcrtomb
+__MINGW_DLSYM(wcrtomb)
 wcrtomb_s
 # endif
 #endif
 wcscat
 #if __MSVCRT_VERSION__ >= 0x0600UL
@@ -3603,11 +3603,11 @@ wcsnlen
 #endif
 wcspbrk
 wcsrchr
 #if __MSVCRT_VERSION__ >= 0x0600UL
 # if __MSVCRT_VERSION__ < 0x07000000UL || __MSVCRT_VERSION__ >= 0x08000000UL
-wcsrtombs
+__MINGW_DLSYM(wcsrtombs)
 wcsrtombs_s
 # endif
 #endif
 wcsspn
 wcsstr