00001
00002
00003
00004
00005
00006
00007 #ifndef _MIMETIC_TOKENIZER_H_
00008 #define _MIMETIC_TOKENIZER_H_
00009 #include <iterator>
00010 #include <algorithm>
00011 #include <set>
00012 #include <string>
00013 #include <cstring>
00014
00015 namespace mimetic
00016 {
00017
00018 template<typename value_type>
00019 struct IsDelim: public std::unary_function<value_type,bool>
00020 {
00021 bool operator()(const value_type& val) const
00022 {
00023 return m_delims.count(val) != 0;
00024 }
00025 template<typename Container>
00026 void setDelimList(const Container& cont)
00027 {
00028 typename Container::const_iterator bit, eit;
00029 bit = cont.begin(), eit = cont.end();
00030 for(; bit != eit; ++bit)
00031 m_delims.insert(*bit);
00032 }
00033 template<typename Iterator>
00034 void setDelimList(Iterator bit, Iterator eit)
00035 {
00036 for(; bit != eit; ++bit)
00037 m_delims.insert(*bit);
00038 }
00039 void addDelim(const value_type& value)
00040 {
00041 m_delims.insert(value);
00042 }
00043 void removeDelim(const value_type& value)
00044 {
00045 m_delims.erase(value);
00046 }
00047 private:
00048 std::set<value_type> m_delims;
00049 };
00050
00051 template<>
00052 struct IsDelim<char>: public std::unary_function<char, bool>
00053 {
00054 void setDelimList(const std::string& delims)
00055 {
00056 setDelimList(delims.begin(), delims.end());
00057 }
00058 template<typename Iterator>
00059 void setDelimList(Iterator bit, Iterator eit)
00060 {
00061 memset(&m_lookup, 0, sizeof(m_lookup));
00062 for(; bit != eit; ++bit)
00063 m_lookup[(int)*bit] = 1;
00064 }
00065 bool operator()(unsigned char val) const
00066 {
00067 return m_lookup[val] != 0;
00068 }
00069 private:
00070 char m_lookup[256];
00071 };
00072
00073
00074
00075 template<class Iterator,typename value_type>
00076 class ItTokenizer
00077 {
00078 public:
00079 ItTokenizer(Iterator bit, Iterator eit)
00080 : m_bit(bit), m_eit(eit), m_tok_eit(bit)
00081 {
00082 }
00083 void setSource(Iterator bit, Iterator eit)
00084 {
00085 m_bit = bit;
00086 m_eit = eit;
00087 m_tok_eit = bit;
00088 }
00089 template<typename DelimCont>
00090 void setDelimList(const DelimCont& cont)
00091 {
00092 m_delimPred.setDelimList(cont);
00093 }
00094 template<typename It>
00095 void setDelimList(It bit, It eit)
00096 {
00097 m_delimPred.setDelimList(bit, eit);
00098 }
00099 template<typename DestCont>
00100 bool next(DestCont& dst)
00101 {
00102 dst.erase(dst.begin(), dst.end());
00103 if(m_tok_eit == m_eit)
00104 return false;
00105 m_tok_eit = std::find_if(m_bit, m_eit, m_delimPred);
00106 m_matched = 0;
00107 if(m_tok_eit != m_eit)
00108 m_matched = *m_tok_eit;
00109 std::copy(m_bit, m_tok_eit, std::back_inserter<DestCont>(dst));
00110 m_bit = (m_tok_eit != m_eit && ++m_tok_eit != m_eit ? m_tok_eit :m_eit);
00111 return true;
00112 }
00113 const value_type& matched() const
00114 {
00115 return m_matched;
00116 }
00117 void addDelim(const value_type& value)
00118 {
00119 m_delimPred.addDelim(value);
00120 }
00121 void removeDelim(const value_type& value)
00122 {
00123 m_delimPred.removeDelim(value);
00124 }
00125 private:
00126 Iterator m_bit, m_eit, m_tok_eit;
00127 IsDelim<value_type> m_delimPred;
00128 value_type m_matched;
00129 };
00130
00131
00132
00133 template<typename Container>
00134 struct ContTokenizer: public ItTokenizer<typename Container::const_iterator,typename Container::value_type>
00135 {
00136 typedef typename Container::value_type value_type;
00137 typedef typename Container::iterator iterator;
00138 typedef typename Container::const_iterator const_iterator;
00139
00140
00141
00142 ContTokenizer(const Container* cont)
00143 : ItTokenizer<const_iterator, value_type>(cont->begin(), cont->end())
00144 {
00145 }
00146 template<typename DelimCont>
00147 ContTokenizer(const Container* cont, const DelimCont& delims)
00148 : ItTokenizer<const_iterator,value_type>(cont->begin(), cont->end())
00149 {
00150 this->setDelimList(delims);
00151 }
00152 void setSource(const Container* cont)
00153 {
00154 ItTokenizer<const_iterator,value_type>::setSource(cont->begin(), cont->end());
00155 }
00156 private:
00157 ContTokenizer(const ContTokenizer&);
00158 ContTokenizer& operator=(const ContTokenizer&);
00159 };
00160
00161
00162 typedef ContTokenizer<std::string> StringTokenizer;
00163
00164 }
00165
00166 #endif
00167