libyui-ncurses  2.55.0
NCWordWrapper.cc
1 /*
2  Copyright (C) 2020 SUSE LLC
3 
4  This library is free software; you can redistribute it and/or modify
5  it under the terms of the GNU Lesser General Public License as
6  published by the Free Software Foundation; either version 2.1 of the
7  License, or (at your option) version 3.0 of the License. This library
8  is distributed in the hope that it will be useful, but WITHOUT ANY
9  WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
11  License for more details. You should have received a copy of the GNU
12  Lesser General Public License along with this library; if not, write
13  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
14  Floor, Boston, MA 02110-1301 USA
15 */
16 
17 
18 /*-/
19 
20  File: NCWordWrapper.h
21 
22  Author: Stefan Hundhammer <shundhammer@suse.de>
23 
24 /-*/
25 
26 
27 #include <cwctype>
28 #include <iostream>
29 #include "NCWordWrapper.h"
30 
31 
32 #define DEFAULT_LINE_WIDTH 78
33 
34 using std::wstring;
35 using std::endl;
36 using std::wcout;
37 
38 
40  _lineWidth( DEFAULT_LINE_WIDTH ),
41  _lines( 0 ),
42  _dirty( false )
43 {
44 
45 }
46 
47 
48 void NCWordWrapper::setText( const wstring & origText )
49 {
50  if ( origText != _origText )
51  {
52  _origText = origText;
53  _dirty = true;
54  }
55 }
56 
57 
58 void NCWordWrapper::setLineWidth( int width )
59 {
60  if ( width != _lineWidth )
61  {
62  _lineWidth = width;
63  _dirty = true;
64  }
65 }
66 
67 
69 {
70  _origText.clear();
71  _wrappedText.clear();
72  _lineWidth = DEFAULT_LINE_WIDTH;
73  _lines = 0;
74  _dirty = false;
75 }
76 
77 
79 {
80  ensureWrapped();
81 
82  return _lines;
83 }
84 
85 
86 const wstring & NCWordWrapper::wrappedText()
87 {
88  ensureWrapped();
89 
90  return _wrappedText;
91 }
92 
93 
95 {
96  if ( _dirty )
97  wrap();
98 
99  _dirty = false;
100 }
101 
102 
103 wstring NCWordWrapper::normalizeWhitespace( const wstring & orig )
104 {
105  wstring normalized;
106  normalized.reserve( orig.size() );
107  bool skippingWhitespace = false;
108 
109  for ( wchar_t c: orig )
110  {
111  switch ( c )
112  {
113  case L' ': // Whitespace
114  case L'\t':
115  case L'\n':
116  case L'\v':
117  case L'\r':
118  case L'\f':
119  // Don't add any whitespace right now: Wait until there is real content.
120 
121  if ( ! normalized.empty() ) // Ignore any leading whitspace
122  skippingWhitespace = true;
123  break;
124 
125  default: // Non-whitespace
126 
127  // Add one blank for any skipped whitespace.
128  //
129  // This will not add trailing whitespace which is exactly the
130  // desired behaviour.
131 
132  if ( skippingWhitespace )
133  normalized += ' ';
134 
135  normalized += c;
136  skippingWhitespace = false;
137  break;
138  }
139  }
140 
141  return normalized;
142 }
143 
144 
146 {
147  wstring unwrapped = normalizeWhitespace( _origText );
148  _wrappedText.clear();
149  _wrappedText.reserve( unwrapped.size() );
150  _lines = 0;
151 
152  while ( ! unwrapped.empty() )
153  {
154  wstring line = nextLine( unwrapped );
155 
156 #ifdef WORD_WRAPPER_TESTER
157  wcout << "Line: \"" << line << "\" length: " << line.size() << endl;
158  wcout << "Rest: \"" << unwrapped << "\"\n" << endl;
159 #endif
160 
161  if ( ! _wrappedText.empty() )
162  _wrappedText += L'\n';
163 
164  _wrappedText += line;
165  _lines++;
166  }
167 
168  _dirty = false;
169 }
170 
171 
172 wstring NCWordWrapper::nextLine( wstring & unwrapped )
173 {
174  wstring line;
175 
176 #ifdef WORD_WRAPPER_TESTER
177  wcout << "nextLine( \"" << unwrapped << "\" )" << endl;
178 #endif
179 
180  if ( (int) unwrapped.size() <= _lineWidth )
181  {
182  // The remaining unwrapped text fits into one line
183 
184  line = unwrapped;
185  unwrapped.clear();
186 
187  return line;
188  }
189 
190 
191  // Try to wrap at the rightmost possible whitespace
192 
193  int pos = _lineWidth; // The whitespace will be removed here
194 
195  while ( pos > 0 && unwrapped[ pos ] != L' ' )
196  --pos;
197 
198  if ( unwrapped[ pos ] == L' ' )
199  {
200  line = unwrapped.substr( 0, pos );
201  unwrapped.erase( 0, pos + 1 );
202 
203  return line;
204  }
205 
206 
207  // Try to wrap at the rightmost possible non-alphanum character
208 
209  pos = _lineWidth - 1; // We'll need to keep the separator character
210 
211  while ( pos > 0 && iswalnum( unwrapped[ pos ] ) )
212  --pos;
213 
214  if ( ! iswalnum( unwrapped[ pos ] ) )
215  {
216 #ifdef WORD_WRAPPER_TESTER
217  wcout << "iswalnum wrap" << endl;
218 #endif
219 
220  line = unwrapped.substr( 0, pos + 1 );
221  unwrapped.erase( 0, pos + 1 );
222 
223  return line;
224  }
225 
226 
227  // Still no chance to break the line? So we'll have to break in mid-word.
228  // This is crude and brutal, but in some locales (Chinese, Japanese,
229  // Korean) there is very little whitespace, so sometimes we have no other
230  // choice.
231 
232 #ifdef WORD_WRAPPER_TESTER
233  wcout << "desperation wrap" << endl;
234 #endif
235 
236  pos = _lineWidth - 1;
237  line = unwrapped.substr( 0, pos + 1 );
238  unwrapped.erase( 0, pos + 1 );
239 
240  return line;
241 }
242 
243 
244 // ----------------------------------------------------------------------
245 
246 
247 // Standalone test frame for this class.
248 //
249 // Build with
250 //
251 // g++ -D WORD_WRAPPER_TESTER -o word-wrapper-tester NCWordWrapper.cc
252 //
253 // Usage:
254 //
255 // ./word-wrapper-tester "text to wrap" <line-length>
256 //
257 // Notice that this does not do any fancy UTF-8 recoding of the command line
258 // arguments, so non-ASCII characters may be slightly broken. This is expected,
259 // and for the sake of simplicity, this will not be fixed. This only affects
260 // this test frame; the tested class can handle UTF-8 characters just fine
261 // (thus "Lörem üpsum" instead of "Lorem ipsum" in the AutoWrap*.cc libyui
262 // examples).
263 
264 #ifdef WORD_WRAPPER_TESTER
265 
266 
267 int main( int argc, char *argv[] )
268 {
269  NCWordWrapper wrapper;
270 
271  if ( argc != 3 )
272  {
273  std::cerr << "\nUsage: " << argv[0] << " \"text to wrap\" <line-length>\n" << endl;
274  exit( 1 );
275  }
276 
277  std::string src( argv[1] );
278  wstring input( src.begin(), src.end() );
279  int lineWidth = atoi( argv[2] );
280 
281  wcout << "Wrapping to " << lineWidth << " columns:\n\"" << input << "\"\n" << endl;
282 
283  wrapper.setText( input );
284  wrapper.setLineWidth( lineWidth );
285  wrapper.wrap();
286 
287  wcout << " 10 20 30 40 50" << endl;
288  wcout << "12345678901234567890123456789012345678901234567890" << endl;
289  wcout << wrapper.wrappedText() << endl;
290  wcout << "-- Wrapped lines: " << wrapper.lines() << endl;
291 }
292 
293 #endif
Helper class to word-wrap text into a specified maximum line width.
Definition: NCWordWrapper.h:39
static std::wstring normalizeWhitespace(const std::wstring &orig)
Return a string where any sequence of whitespace in the original text is replaced with a single blank...
void setLineWidth(int width)
Set the maximum line width to wrap into.
void setText(const std::wstring &origText)
Set the original text to wrap.
int lines()
Return the number of lines after wrapping the original text.
void wrap()
Do the wrapping.
std::wstring nextLine(std::wstring &unwrapped)
Return the next line that fits into the line width and removed it from 'unwrapped'.
NCWordWrapper()
Constructor.
const std::wstring & wrappedText()
Wrap the original text and return the wrapped text.
void ensureWrapped()
Do the wrapping if necessary.
void clear()
Clear the old content.
const std::wstring & origText() const
Return the original unwrapped text.
Definition: NCWordWrapper.h:70