strtool.h

Go to the documentation of this file.
00001 #ifndef str_h_included
00002 #define str_h_included
00003 
00004 
00005 //
00006 // Copyright 2002, Lowell Boggs Jr.
00007 //
00008 // This file or directory, containing source code for a computer program,
00009 // is Copyrighted by Lowell Boggs, Jr.  987 Regency Drive, Lewisville
00010 // TX (USA), 75067.  You may use, copy, modify, and distribute this
00011 // source file without charge or obligation so long as you agree to
00012 // the following:
00013 //
00014 //  1.  You must indemnify Lowell Boggs against any and all financial
00015 //      obligations caused by its use, misuse, function, or malfunction.
00016 //      Further, you acknowledge that there is no warranty of any kind,
00017 //      whatsoever.
00018 //
00019 //  2.  You agree not to attempt to patent any portion of this original
00020 //      work -- though you may attempt to patent your own extensions to
00021 //      it if you so choose.
00022 //
00023 //  3.  You keep this copyright notice with the file and all copies
00024 //      of the file and do not change it anyway except language translation.
00025 //
00026 // You are responsible for enforcing your own compliance with these
00027 // conditions and may not use this source file if you cannot agree to the
00028 // above terms and conditions.
00029 
00030 
00031 
00032 
00037 //
00038 
00039 #include <list>
00040 #include <string>
00041 #include <string.h>
00042 #include <algorithm>
00043 #include <vector>
00044 #include <ctype.h>
00045 
00046 #ifdef _MSC_VER
00047 #define STRNCASECMP strnicmp
00048 #else
00049 #define STRNCASECMP strncasecmp
00050 #endif
00051 
00052 
00053 namespace cxxtls
00054 {
00055 
00056 class StrTool
00057     //
00062 {
00063 public:
00064 
00065   typedef std::string        string;
00066   typedef std::list<string>  stringlist_t;
00067 
00068   enum constants
00069   {
00070      all=2000000000  
00071   };
00072 
00073   struct Is_Space
00074     //
00099     //
00100   {
00101     bool operator() (char c) const
00102     {
00103       return c == ' '  ||  //  perform the comparison
00104              c == '\t' ||
00105              c == '\n' ||
00106              c == '\r' ;
00107     }
00108 
00109   };
00110 
00111   struct Is_Newline
00112     //
00115     //
00116   {
00117     bool operator() (char c) const
00118     {
00119       return c == '\n';
00120     }
00121   };
00122 
00123   struct Is_Colon
00124   {
00125     //
00128     //
00129     bool operator() (char c) const { return c == ':'; }
00130   };
00131 
00132   struct Is_Comma
00133   {
00134     //
00137     //
00138     bool operator() (char c) const { return c == ','; }
00139   };
00140 
00141   struct Is_Tab
00142   {
00143     //
00146     //
00147     bool operator() (char c) const { return c == ','; }
00148   };
00149 
00150   struct Is_Delim
00151   {
00152     //
00155     //
00156     char delim_;
00157 
00158     Is_Delim(char c): delim_(c) {}
00159 
00160     bool operator() (char c) const { return c == delim_; }
00161   };
00162 
00163     static 
00164     size_t 
00165     splitOnString(std::string const &s, std::vector<std::string> &output, std::string const &sep)
00166        // return the count of items added to the output
00167     {
00168         size_t count = 0;
00169 
00170         size_t pos=0;
00171 
00172         size_t size = s.size();
00173 
00174         size_t sepSize = sep.size();
00175 
00176         for(;;)
00177         {
00178            size_t newPos = s.find(sep, pos);
00179 
00180            if(newPos <= size)
00181            {
00182               // found a new one!
00183 
00184               output.push_back("");
00185               output.back() = s.substr(pos, newPos - pos);
00186 
00187               pos = newPos + sepSize;
00188 
00189               ++count;
00190 
00191            }
00192            else
00193            {
00194              output.push_back( s.c_str() + pos );
00195 
00196              ++count;
00197 
00198              break;
00199            }
00200         }
00201 
00202 
00203         return count;
00204 
00205     }
00206 
00207 
00208   template<class IsSeparator>
00209   static
00210   int  parse_words(string const       &s,
00211                    stringlist_t       *l,
00212                    unsigned            at_most,
00213                    IsSeparator const & is_space
00214                   )
00242   {
00243     string::const_iterator cur = s.begin();
00244     string::const_iterator end = s.end();
00245 
00246     int count=0;
00247 
00248     while( at_most != 0  &&
00249            cur != end
00250          )
00251     {
00252       // parse a word and stick it in the list -- remove leading and
00253       // trailing spaces unless this is the 'last word' in which case
00254       // we remove leading but not trailing characters.
00255 
00256       while(cur != end && is_space(*cur) ) ++cur;
00257 
00258       if(cur == end )
00259           return count;
00260 
00261       if(at_most == 1)           // last string gets all of text
00262       {
00263          string word(cur,end);
00264 
00265          l->push_back(word);
00266 
00267          ++count;
00268 
00269          return count;
00270       }
00271 
00272       // strings other than the last have the trailing spaces removed
00273       // before insertion into the list
00274 
00275       string word;
00276 
00277       char c;
00278 
00279       while(cur != end && !is_space( c = *cur) )
00280       {
00281         word += c;
00282         ++cur;
00283       }
00284 
00285       l->push_back(word);
00286 
00287       ++count;
00288 
00289       --at_most;
00290     }
00291 
00292     return count;
00293   }
00294 
00295 
00296                         
00297   static int  parse_words(string const  &s,  stringlist_t  *l )
00302   {
00303     return parse_words(s, l, all, Is_Space() );
00304   }
00305 
00306   static int  parse_words(string const  &s,  stringlist_t  *l, unsigned at_most )
00311   {
00312     return parse_words(s, l, at_most, Is_Space() );
00313   }
00314 
00315   static int  parse_lines(string const  &s,  stringlist_t  *l )
00316     //
00318     //
00319   {
00320     return parse_words(s, l, all, Is_Newline() );
00321   }
00322 
00323   static int  parse_lines(string const  &s,  stringlist_t  *l, unsigned at_most )
00324     //
00327     //
00328   {
00329     return parse_words(s, l, at_most, Is_Newline() );
00330   }
00331 
00332 
00333   static string expand_tabs(char const *sourceBytes,
00334                             bool* non_printable_chars = 0,
00335                             char  replacement = 0
00336                            );
00357 
00358 
00359   static string expand_tabs(string const &r,
00360                             bool *non_printable_chars=0,
00361                             char replacement=0
00362                            )
00366   {
00367     return expand_tabs(r.c_str(), non_printable_chars, replacement);
00368   }
00369 
00370   static string pack_tabs(char const *s, bool stop_after_first_non_blank=true);
00380 
00381   static string pack_tabs(string const &r, bool stop_after_first_non_blank=true)
00390   {
00391     return pack_tabs(r.c_str());
00392   }
00393 
00394   template<class Char, class Iterator>
00395   static
00396   bool
00397   is_word_char(Char c, Iterator word_start, Iterator word_end)
00402   {
00403     return std::find(word_start, word_end, c) != word_end;
00404   }
00405 
00406 
00407   template<class It1, class It2>
00408   static
00409   It1
00410   find_next_word(It1 start, It1 end, It2 word_start, It2  word_end)
00425   {
00426 
00427     // skip past word characters under the cursor
00428 
00429     while(start != end)
00430     {
00431        if(!is_word_char(*start, word_start, word_end))
00432        {
00433          break;
00434        }
00435 
00436        ++start;
00437 
00438     }
00439 
00440     // scan to the beginning of the next word
00441 
00442     return std::find_first_of(start, end, word_start, word_end);
00443 
00444   }
00445 
00446 
00447   template<class It1, class It2>
00448   static
00449   It1
00450   find_prev_word(It1 start, It1 begin, It1 end, It2 word_start, It2 word_end)
00456   {
00457 
00458     if(start == begin)
00459       return start;
00460 
00461     if(start == end)
00462       --start;
00463 
00464     if(start == begin)
00465       return start;
00466 
00467     if( is_word_char(*start, word_start, word_end) )
00468     {
00469       // find beginning of current word -- unless we are
00470       // already at the beginning, then find the beginning of the
00471       // previous word
00472 
00473       --start;
00474 
00475       if(start == begin)
00476         return start;
00477 
00478       if(is_word_char(*start, word_start, word_end))
00479       {
00480         // find beginning of this word
00481 
00482         --start;
00483 
00484         while(start != begin)
00485         {
00486           if( !is_word_char(*start, word_start, word_end) )
00487           {
00488             ++start;
00489             break;
00490           }
00491         
00492           --start;
00493         }
00494 
00495         // we are either at the beginning of the word or the
00496         // beginning of the string
00497 
00498         return start;
00499 
00500       }
00501       else
00502       {
00503         // drop through and find beginning of previous
00504         // word.
00505       }
00506 
00507     }
00508 
00509     // we are not in a word character -- scan back to the
00510     // beginning of the previous word.
00511 
00512     while(start != begin)
00513     {
00514       if(is_word_char(*start, word_start, word_end))
00515       {
00516         break;
00517       }
00518       --start;
00519     }
00520 
00521     if(start == begin)
00522       return start;
00523 
00524     while(start != begin)
00525     {
00526       if(!is_word_char(*start, word_start, word_end))
00527       {
00528         ++start;
00529         return start;
00530       }
00531       --start;
00532     }
00533 
00534     return start;
00535 
00536   }
00537 
00538   static void remove_leading(std::string &s, char space=' ');
00540 
00541   static int compareInsensitive(std::string const &a, std::string const &b)
00542   {
00543      size_t asize = a.size();
00544      size_t bsize = b.size();
00545 
00546      size_t smallerSize = asize < bsize ? asize : bsize;
00547 
00548      int rc = STRNCASECMP(a.data(), b.data(), smallerSize);
00549 
00550      if(rc != 0)
00551      {
00552         return rc;
00553      }
00554 
00555      // if you get here, a and b are equal for some leading number of characters
00556      // so define the shorter of the two total strins as being less
00557 
00558      if(asize < bsize)
00559         return -1;
00560      else
00561      if(asize > bsize)
00562         return 1;
00563 
00564      // a and b are the same length and they are of equal when we get here
00565 
00566 
00567 
00568      return 0;
00569 
00570      
00571   }
00572 
00573 
00574    static bool snatch_file_info(std::string const &s, std::string &file, int &line);
00605 
00606 
00607   struct CharCompare
00623   {
00624     bool insensitive_;
00625 
00626     bool operator() (char a, char b) const
00627     {
00628         if(insensitive_)
00629         {
00630           if( isupper(a) )
00631             a = tolower(a);
00632         
00633           if( isupper(b) )
00634             b = tolower(b);
00635         
00636           return a == b;
00637         }
00638         
00639         return a == b;
00640     }
00641 
00642     CharCompare(bool caseInsensitive)
00643     : insensitive_(caseInsensitive)
00644     {
00645     }
00646 
00647   };
00648 
00649   static size_t find(char const *& fragment, size_t fragLen, char const *&within, size_t totalLen, bool insensitive)
00663   {
00664       
00665       char const *where = std::search( within, within+totalLen, fragment, fragment+fragLen, CharCompare(insensitive));
00666 
00667       return where - within;
00668 
00669   }
00670 
00671   static size_t find(std::string const &fragment, std::string const &within, bool insensitive)
00672   {
00673      char const *frag = fragment.data();
00674      char const *with = within.data();
00675 
00676      return find(frag, fragment.size(), with, within.length(), insensitive);
00677   }
00678 
00679   static size_t find(char const *fragment, std::string const &within, bool insensitive)
00680   {
00681      char const *with = within.data();
00682      return find(fragment, strlen(fragment), with, within.length(), insensitive);
00683   }
00684 
00685 
00686 
00687 
00688 
00689 };  // class StrTool
00690 
00691 } // namespace cxxtls
00692 
00693 #endif
Generated on Wed Feb 29 22:50:04 2012 for CXXUtilities by  doxygen 1.6.3