Standard C++ Library
Copyright 1998, Rogue Wave Software, Inc.
NAME
codecvt
- A code conversion facet.
SYNOPSIS
#include <locale>
class codecvt_base;
template <class internT, class externT, class stateT>
class codecvt;
DESCRIPTION
The codecvt<internT,externT,stateT>_template has code
conversion facilities. Default implementations of
codecvt<char,wchar_t,mbstate_t> and
codecvt<wchar_t,char,mbstate_t> use_ctype<wchar_t>::widen
and ctype<wchar_t>::narrow respectively. The default imple-
mentation of codecvt<wchar_t,wchar_t,mbstate_t> simply uses
memcpy (no particular conversion applied).
INTERFACE
class codecvt_base {
public:
enum result { ok, partial, error, noconv };
};
template <class internT, class externT, class stateT>
class codecvt : public locale::facet, public codecvt_base {
public:
typedef internT intern_type;
typedef externT extern_type;
typedef stateT state_type;
explicit codecvt(size_t = 0)
result out(stateT&, const internT*,
const internT*, const internT*&,
externT*, externT*, externT*&) const;
result unshift(stateT&, externT*, externT*, extern*&)
const;
result in(stateT&, const externT*,
const externT*, const externT*&,
internT*, internT*, internT*&) const;
int encoding() const throw();
bool always_noconv() const throw();
int length(const stateT&, const externT*, const externT*,
size_t) const;
int max_length() const throw();
static locale::id id;
protected:
~codecvt(); // virtual
virtual result do_out(stateT&,
const internT*,
const internT*,
const internT*&,
externT*, externT*,
externT*&) const;
virtual result do_in(stateT&,
const externT*,
const externT*,
const externT*&,
internT*, internT*,
internT*&) const;
virtual result do_unshift(stateT&,
externT*, externT*,
externT*&) const;
virtual int do_encoding() const throw();
virtual bool do_always_noconv() const throw();
virtual int do_length(const stateT&, const internT*,
const internT*,
size_t) const;
virtual int do_max_length() const throw();
};
TYPES
intern_type
Type of character to convert from.
extern_type
Type of character to convert to.
state_type
Type to keep track of state and determine the codeset(s)
to be converted.
CONSTRUCTORS
explicit codecvt(size_t refs = 0)
Construct a codecvt facet. If the refs argument is 0
(default), then destruction of the object is delegated to
the locale, or locales, containing it. This allows the
user to ignore lifetime management issues. On the other
hand, if refs is 1, then the object must be explicitly
deleted; the locale does not do so.
DESTRUCTORS
~codecvt(); // virtual and protected
Destroy the facet.
FACET ID
static locale::id id;
Unique identifier for this type of facet.
PUBLIC MEMBER FUNCTIONS
The public members of the codecvt facet include an interface
to protected members. Each public member xxx has a
corresponding virtual protected member do_xxx. All work is
delegated to these protected members. For instance, the
public length function simply calls its protected cousin
do_length.
bool
always_noconv() const
throw();
int
encoding() const
throw();
result
in(stateT& state, const externT* from,
const externT* from_end, const externT*& from_next,
internT* to, internT* to_limit, internT*& to_next) const;
int
length(const stateT& state, const internT* from,
const internT* end,
size_t max) const;
int
max_length() const
throw();
result
out(stateT& state, const internT* from,
const internT* from_end, const internT*& from_next,
externT* to, externT* to_limit, externT*& to_next)
const;
result
unshift(stateT& state, externT* to, externT* to_limit,
externT*& to) const;
Each of these public member functions xxx simply calls
the corresponding protected do_xxx function.
PROTECTED MEMBER FUNCTIONS
virtual bool
do_always_noconv() const
throw();
Returns true if no conversion is required. This is the
case if do_in and do_out return noconv for all valid
arguments. The instantiation codecvt<char,char,mbstate_t>
returns true, while all other default instantiations
return false.
virtual int
do_encoding() const
throw();
Returns one of the following
- -1 if the encoding on the external character
sequence is dependent on state.
- A constant number representing the number of
external characters per internal character in a
fixed width encoding.
- 0 if the encoding is uses a variable width.
virtual result
do_in(stateT& state,
const externT* from,
const externT* from_end,
const externT*& from_next,
internT* to, internT* to_limit,
internT*& to_next) const;
virtual result
do_out(stateT& state,
const internT* from,
const internT* from_end,
const internT*& from_next,
externT* to, externT* to_limit,
externT*& to_next) const;
Both functions take characters in the range of
[from,from_end), apply an appropriate conversion, and
place the resulting characters in the buffer starting at
to. Each function converts at most from_end-from internT
characters, and stores no more than to_limit-to externT
characters. Both do_out and do_in stop if they find a
character they cannot convert. In any case, from_next and
to_next are always left pointing to the next character
beyond the last one successfully converted.
do_out and do_in must be called under the following pre-
conditions:
- from <= from_end
- to <= to_end
- state is either initialized to the beginning of
a sequence or equal to the result of the previ-
ous conversion on the sequence.
In the case where no conversion is required, from_next is
set to from and to_next set to to.
do_out and do_in return one the following:
RETURN VALUE MEANING
ok completed the conversion
partial not all source characters converted
error encountered a source character
it could not convert
noconv no conversion was needed
If either function returns partial and (from ==
from_end), then one of two conditions prevail:
- The destination sequence has not accepted all
the converted characters, or
- Additional source characters are needed before
another destination element can be
assembled..RE
virtual int
do_length(const stateT&, const externT* from,
const externT* end,
size_t max) const;
Determines the largest number <= max of internT charac-
ters that can be produced from the sequence [from,end),
and returns the number of externT characters that would
be consumed from [from,end) in order to produce this
number of internT characters.
do_length must be called under the following pre-
conditions:
- from <= from_end
- state is either initialized to the beginning of
a sequence or equal to the result of the previ-
ous conversion on the sequence.
virtual int
do_max_length() const throw();
Returns the maximum value that do_length can return for
any valid combination of its first three arguments, with
the fourth argument (max) set to 1.
virtual result
do_out(stateT& state,
const internT* from,
const internT* from_end,
const internT*& from_next,
externT* to, externT* to_limit,
externT*& to_next) const;
See do_in above.
virtual result
do_unshift(stateT& state, externT* to, externT* to_limit,
externT*& to_next) const;
Determines the sequence of externT characters that should
be appended to a sequence whose state is given by state,
in order to terminate the sequence (that is, to return it
to the default or initial or unshifted state). Stores the
terminating sequence starting at to, proceeding no
farther than to_limit. Sets to_end to point past the last
externT character stored.
do_unshift must be called under the following pre-
conditions:
- from <= from_end
- state is either initialized to the beginning of
a sequence or equal to the result of the previ-
ous conversion on the sequence.
The return value from do_unshift is as follows:
RETURN VALUE MEANING
ok terminating sequence was
stored successfully
partial only part of the sequence
was stored
error the state is invalid
noconv no terminating sequence is
needed for this state
EXAMPLE
//
// codecvt.cpp
//
#include <sstream>
#include "codecvte.h"
int main ()
{
using namespace std;
mbstate_t state;
// A string of ISO characters and buffers to hold
// conversions
string ins("\xfc \xcc \xcd \x61 \xe1 \xd9 \xc6 \xf5");
string ins2(ins.size(),'.');
string outs(ins.size(),'.');
// Print initial contents of buffers
cout << "Before:\n" << ins << endl;
cout << ins2 << endl;
cout << outs << endl << endl;
// Initialize buffers
string::iterator in_it = ins.begin();
string::iterator out_it = outs.begin();
// Create a user defined codecvt fact
// This facet converts from ISO Latin
// Alphabet No. 1 (ISO 8859-1) to
// U.S. ASCII code page 437
// This facet replaces the default for
// codecvt<char,char,mbstate_t>
locale loc(locale(),new ex_codecvt);
// Now get the facet from the locale
const codecvt<char,char,mbstate_t>& cdcvt =
#ifndef _RWSTD_NO_TEMPLATE_ON_RETURN_TYPE
use_facet<codecvt<char,char,mbstate_t> >(loc);
#else
use_facet(loc,(codecvt<char,char,mbstate_t>*)0);
#endif
// convert the buffer
cdcvt.in(state,ins.begin(),ins.end(),in_it,
outs.begin(),outs.end(),out_it);
cout << "After in:\n" << ins << endl;
cout << ins2 << endl;
cout << outs << endl << endl;
// Lastly, convert back to the original codeset
in_it = ins.begin();
out_it = outs.begin();
cdcvt.out(state, outs.begin(),outs.end(),out_it,
ins2.begin(),ins2.end(),in_it);
cout << "After out:\n" << ins << endl;
cout << ins2 << endl;
cout << outs << endl;
return 0;
}
SEE ALSO
locale, facets, codecvt_byname