charSetTranslator.cxx

Go to the documentation of this file.
00001 #include <cxxtls/charSetTranslator.h>
00002 
00003 namespace cxxtls
00004 {
00005     std::string CharSetTranslator::translate(std::string const &s) const
00006     {
00007         std::string rv;
00008 
00009         if(!ok_)
00010           return std::string();
00011 
00012         std::string::const_iterator first = s.begin(),
00013                                     last  = s.end();
00014         while(first != last)
00015         {
00016             rv += this->operator[](*first++);
00017         }
00018 
00019         return rv;
00020         
00021     }
00022 
00023     void CharSetTranslator::populateLhs(char const *inset, LhsRange &lhs)
00024     {
00025         int rhsOffset=0;   // assuming characters are arrange in ascending order in the lhs string
00026                            // and that the character position on the lhs, tells you the character
00027                            // position in the rhs to establish the mapping
00028 
00029         while(*inset)
00030         {
00031            unsigned char c = *inset++;
00032 
00033            if(*inset == '-')
00034            {
00035               // this is a range not a single character
00036 
00037               char const *next = inset; ++next;
00038 
00039               unsigned char end = *next;
00040 
00041               if(end != 0)
00042               {
00043                  // there is an end to the range
00044 
00045                  if(end < c)
00046                  {
00047                      ok_ = false;
00048                      error_ = "lhs: end of range smaller than beginning";
00049                      break;
00050                  }
00051 
00052                  while(c <= end)
00053                  {
00054                      lhs.push_back(LhsSubrange(c++, rhsOffset++));
00055                  }
00056 
00057                  inset += 2;
00058 
00059               }
00060               else
00061               {
00062                  ok_    = false;
00063                  error_ = "lhs: missing end of range, '-' is last character of string";
00064                  break;
00065               }
00066            }
00067            else
00068            {
00069               // this is a single character range
00070 
00071               lhs.push_back(LhsSubrange(c, rhsOffset++));
00072 
00073            }
00074 
00075         }
00076 
00077     }
00078 
00079     void CharSetTranslator::populateRhs(char const *set, char *rhs)
00080     {
00081         if(*set == 0)
00082         {
00083            ok_     = false;
00084            error_  = "rhs:  empty string now allowed";
00085            return;
00086         }
00087 
00088         char *end = rhs+256;
00089 
00090         unsigned char c = *set++;
00091 
00092         while(c != 0 && rhs != end)
00093         {
00094             if(*set == 0)
00095             {
00096                // when you reach the end of specified input data, use the
00097                // final character to populate the remainder of the translation
00098                // table.
00099 
00100                while(rhs != end)
00101                    *rhs++ = c;
00102 
00103                return;
00104             }
00105 
00106             if(*set == '-')
00107             {
00108                  char const *next =  set; ++next;
00109 
00110                  if(*next == 0)
00111                  {
00112                      ok_    = false;
00113                      error_ = "rhs:  missing end of range, '-' is last char";
00114                      return;
00115                  }
00116 
00117                  unsigned char g = *next;
00118 
00119                  if(c > g)
00120                  {
00121                      ok_ = false;
00122                      error_ = "rhs:  end of range less than beginning";
00123                      return;
00124                  }
00125 
00126                  while(    (c <= g) 
00127                        && (rhs != end)
00128                       )
00129                  {
00130                      *rhs++ = c++; 
00131                  }
00132 
00133                  set += 2;
00134 
00135             }
00136             else
00137             {
00138                // this is a single character sub-range
00139 
00140                *rhs++ = c;
00141             }
00142            
00143             c = *set++;
00144         }
00145 
00146     }
00147 
00148     void CharSetTranslator::populateMapping(LhsRange const &lhs, char const *rhs)
00149     {
00150 
00151         // populate the mapping table with a default mapping ( 1:1 )
00152 
00153         char *scan = mapping_;
00154         size_t   count= 0;
00155 
00156         while(count < sizeof(mapping_))
00157         {
00158            *scan++ = count++;
00159         }
00160 
00161         // find mapped characters and change from the default
00162         // mapping to the user specified mapping.
00163 
00164 
00165         LhsRange::const_iterator first = lhs.begin(),
00166                                  last  = lhs.end();
00167 
00168          while(first != last)
00169          {
00170              mapping_[ first->offset_ ] = rhs[ first->mappedOffset_ ];
00171              ++first;
00172          }
00173 
00174     }
00175 
00176     std::string CharSetTranslator::unEscape(std::string const &in)
00177     {
00178         std::string rv;
00179 
00180         char const *scan = in.data();
00181         char const * end = in.size() + scan;
00182 
00183         while(scan != end)
00184         {
00185            char c = *scan++;
00186 
00187            if(c == '\\')
00188            {
00189              if(scan == end)
00190                rv += c;
00191              else
00192              {
00193                  c = *scan++;
00194 
00195                  switch(c)
00196                  {
00197                      case 'n':  c = '\n'; break;
00198                      case 'r':  c = '\r'; break;
00199                      case 't':  c = '\t'; break;
00200                      case 's':  c = ' '; break;
00201                      case 'b':  c = '\b'; break;
00202                      case 'a':  c = '\a'; break;
00203                      case 'f':  c = '\f'; break;
00204 
00205                      case '0':
00206                      case 'o':
00207                        {
00208                            // handle octal numbers of the form:  \o123
00209                            //                                      ^
00210                            //                                      scan is here
00211 
00212                            if(   (end - scan >= 3)
00213                               && (scan[0] >= '0' && scan[0] <= '7')
00214                               && (scan[1] >= '0' && scan[1] <= '7')
00215                               && (scan[2] >= '0' && scan[2] <= '7')
00216                               )
00217                            {
00218                               c =  (scan[0]-'0') * 64;
00219                               c += (scan[1]-'0') * 8;
00220                               c += (scan[2]-'0');
00221 
00222                               scan+= 3;
00223                            }
00224 
00225                        }
00226                        break;
00227 
00228                      default:
00229                        break; // just eat the \ and keep the character
00230                               // that follows
00231                  }
00232 
00233                  rv += c;
00234              }
00235 
00236 
00237            }
00238            else
00239              rv += c;
00240         }
00241 
00242         return rv;
00243 
00244     }
00245 
00246 
00247 
00248 };
Generated on Wed Feb 29 22:50:04 2012 for CXXUtilities by  doxygen 1.6.3