001/*
002 * Copyright 2016-2018 Ping Identity Corporation
003 * All Rights Reserved.
004 */
005/*
006 * Copyright (C) 2016-2018 Ping Identity Corporation
007 *
008 * This program is free software; you can redistribute it and/or modify
009 * it under the terms of the GNU General Public License (GPLv2 only)
010 * or the terms of the GNU Lesser General Public License (LGPLv2.1 only)
011 * as published by the Free Software Foundation.
012 *
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Public License for more details.
017 *
018 * You should have received a copy of the GNU General Public License
019 * along with this program; if not, see <http://www.gnu.org/licenses>.
020 */
021package com.unboundid.ldap.sdk.unboundidds.tools;
022
023
024
025import java.io.File;
026import java.io.FileOutputStream;
027import java.io.InputStream;
028import java.io.IOException;
029import java.io.OutputStream;
030import java.util.ArrayList;
031import java.util.Collections;
032import java.util.LinkedHashMap;
033import java.util.LinkedHashSet;
034import java.util.List;
035import java.util.Map;
036import java.util.Set;
037import java.util.TreeMap;
038import java.util.concurrent.atomic.AtomicLong;
039import java.util.zip.GZIPOutputStream;
040
041import com.unboundid.ldap.sdk.Filter;
042import com.unboundid.ldap.sdk.LDAPException;
043import com.unboundid.ldap.sdk.ResultCode;
044import com.unboundid.ldap.sdk.Version;
045import com.unboundid.ldap.sdk.schema.Schema;
046import com.unboundid.ldif.LDIFException;
047import com.unboundid.ldif.LDIFReader;
048import com.unboundid.util.ByteStringBuffer;
049import com.unboundid.util.CommandLineTool;
050import com.unboundid.util.Debug;
051import com.unboundid.util.ObjectPair;
052import com.unboundid.util.PassphraseEncryptedOutputStream;
053import com.unboundid.util.StaticUtils;
054import com.unboundid.util.ThreadSafety;
055import com.unboundid.util.ThreadSafetyLevel;
056import com.unboundid.util.args.ArgumentException;
057import com.unboundid.util.args.ArgumentParser;
058import com.unboundid.util.args.BooleanArgument;
059import com.unboundid.util.args.DNArgument;
060import com.unboundid.util.args.FileArgument;
061import com.unboundid.util.args.FilterArgument;
062import com.unboundid.util.args.IntegerArgument;
063import com.unboundid.util.args.SubCommand;
064import com.unboundid.util.args.StringArgument;
065
066import static com.unboundid.ldap.sdk.unboundidds.tools.ToolMessages.*;
067
068
069
070/**
071 * This class provides a command-line tool that can be used to split an LDIF
072 * file below a specified base DN.  This can be used to help initialize an
073 * entry-balancing deployment for use with the Directory Proxy Server.
074 * <BR>
075 * <BLOCKQUOTE>
076 *   <B>NOTE:</B>  This class, and other classes within the
077 *   {@code com.unboundid.ldap.sdk.unboundidds} package structure, are only
078 *   supported for use against Ping Identity, UnboundID, and Alcatel-Lucent 8661
079 *   server products.  These classes provide support for proprietary
080 *   functionality or for external specifications that are not considered stable
081 *   or mature enough to be guaranteed to work in an interoperable way with
082 *   other types of LDAP servers.
083 * </BLOCKQUOTE>
084 * <BR>
085 * It supports a number of algorithms for determining how to split the data,
086 * including:
087 * <UL>
088 *   <LI>
089 *     split-using-hash-on-rdn -- The tool will compute a digest of the DN
090 *     component that is immediately below the split base DN, and will use a
091 *     modulus to select a backend set for a given entry.  Since the split is
092 *     based purely on computation involving the DN, the there is no need for
093 *     caching to ensure that children are placed in the same sets as their
094 *     parent, which allows it to run effectively with a small memory footprint.
095 *   </LI>
096 *   <LI>
097 *     split-using-hash-on-attribute -- The tool will compute a digest of the
098 *     value(s) of a specified attribute, and will use a modulus to select a
099 *     backend set for a given entry.  This hash will only be computed for
100 *     entries immediately below the split base DN, and a cache will be used to
101 *     ensure that entries more than one level below the split base DN are
102 *     placed in the same backend set as their parent.
103 *   </LI>
104 *   <LI>
105 *     split-using-fewest-entries -- When examining an entry immediately below
106 *     the split base DN, the tool will place that entry in the set that has the
107 *     fewest entries.  For flat DITs in which entries only exist one level
108 *     below the split base DN, this will effectively ensure a round-robin
109 *     distribution.  But for cases in which there are branches of varying sizes
110 *     below the split base DN, this can help ensure that entries are more
111 *     evenly distributed across backend sets.  A cache will be used to ensure
112 *     that entries more than one level below the split base DN are placed in
113 *     the same backend set as their parent.
114 *   </LI>
115 *   <LI>
116 *     split-using-filter -- When examining an entry immediately below the split
117 *     base DN, a series of filters will be evaluated against that entry, which
118 *     each filter associated with a specific backend set.  If an entry doesn't
119 *     match any of the provided filters, an RDN hash can be used to select the
120 *     set.  A cache will be used to ensure that entries more than one level
121 *     below the split base DN are placed in the same backend set as their
122 *     parent.
123 *   </LI>
124 * </UL>
125 */
126@ThreadSafety(level=ThreadSafetyLevel.NOT_THREADSAFE)
127public final class SplitLDIF
128     extends CommandLineTool
129{
130  /**
131   * The maximum length of any message to write to standard output or standard
132   * error.
133   */
134  private static final int MAX_OUTPUT_LINE_LENGTH =
135       StaticUtils.TERMINAL_WIDTH_COLUMNS - 1;
136
137
138
139  // The global arguments used by this tool.
140  private BooleanArgument addEntriesOutsideSplitBaseDNToAllSets = null;
141  private BooleanArgument addEntriesOutsideSplitBaseDNToDedicatedSet = null;
142  private BooleanArgument compressTarget = null;
143  private BooleanArgument encryptTarget = null;
144  private BooleanArgument sourceCompressed = null;
145  private DNArgument splitBaseDN = null;
146  private FileArgument encryptionPassphraseFile = null;
147  private FileArgument schemaPath = null;
148  private FileArgument sourceLDIF = null;
149  private FileArgument targetLDIFBasePath = null;
150  private IntegerArgument numThreads = null;
151
152  // The arguments used to split using a hash of the RDN.
153  private IntegerArgument splitUsingHashOnRDNNumSets = null;
154  private SubCommand splitUsingHashOnRDN = null;
155
156  // The arguments used to split using a hash on a specified attribute.
157  private BooleanArgument splitUsingHashOnAttributeAssumeFlatDIT = null;
158  private BooleanArgument splitUsingHashOnAttributeUseAllValues = null;
159  private IntegerArgument splitUsingHashOnAttributeNumSets = null;
160  private StringArgument splitUsingHashOnAttributeAttributeName = null;
161  private SubCommand splitUsingHashOnAttribute = null;
162
163  // The arguments used to choose the set with the fewest entries.
164  private BooleanArgument splitUsingFewestEntriesAssumeFlatDIT = null;
165  private IntegerArgument splitUsingFewestEntriesNumSets = null;
166  private SubCommand splitUsingFewestEntries = null;
167
168  // The arguments used to choose the set using a provided set of filters.
169  private BooleanArgument splitUsingFilterAssumeFlatDIT = null;
170  private FilterArgument splitUsingFilterFilter = null;
171  private SubCommand splitUsingFilter = null;
172
173
174
175  /**
176   * Runs the tool with the provided set of command-line arguments.
177   *
178   * @param  args  The command-line arguments provided to this tool.
179   */
180  public static void main(final String... args)
181  {
182    final ResultCode resultCode = main(System.out, System.err, args);
183    if (resultCode != ResultCode.SUCCESS)
184    {
185      System.exit(resultCode.intValue());
186    }
187  }
188
189
190
191  /**
192   * Runs the tool with the provided set of command-line arguments.
193   *
194   * @param  out   The output stream used for standard output.  It may be
195   *               {@code null} if standard output should be suppressed.
196   * @param  err   The output stream used for standard error.  It may be
197   *               {@code null} if standard error should be suppressed.
198   * @param  args  The command-line arguments provided to this tool.
199   *
200   * @return  A result code with information about the processing performed.
201   *          Any result code other than {@link ResultCode#SUCCESS} indicates
202   *          that an error occurred.
203   */
204  public static ResultCode main(final OutputStream out, final OutputStream err,
205                                final String... args)
206  {
207    final SplitLDIF tool = new SplitLDIF(out, err);
208    return tool.runTool(args);
209  }
210
211
212
213  /**
214   * Creates a new instance of this tool with the provided information.
215   *
216   * @param  out  The output stream used for standard output.  It may be
217   *              {@code null} if standard output should be suppressed.
218   * @param  err  The output stream used for standard error.  It may be
219   *              {@code null} if standard error should be suppressed.
220   */
221  public SplitLDIF(final OutputStream out, final OutputStream err)
222  {
223    super(out, err);
224  }
225
226
227
228  /**
229   * {@inheritDoc}
230   */
231  @Override()
232  public String getToolName()
233  {
234    return "split-ldif";
235  }
236
237
238
239  /**
240   * {@inheritDoc}
241   */
242  @Override()
243  public String getToolDescription()
244  {
245    return INFO_SPLIT_LDIF_TOOL_DESCRIPTION.get();
246  }
247
248
249
250  /**
251   * {@inheritDoc}
252   */
253  @Override()
254  public String getToolVersion()
255  {
256    return Version.NUMERIC_VERSION_STRING;
257  }
258
259
260
261  /**
262   * {@inheritDoc}
263   */
264  @Override()
265  public boolean supportsInteractiveMode()
266  {
267    return true;
268  }
269
270
271
272  /**
273   * {@inheritDoc}
274   */
275  @Override()
276  public boolean defaultsToInteractiveMode()
277  {
278    return true;
279  }
280
281
282
283  /**
284   * {@inheritDoc}
285   */
286  @Override()
287  public boolean supportsPropertiesFile()
288  {
289    return true;
290  }
291
292
293
294  /**
295   * {@inheritDoc}
296   */
297  @Override()
298  public void addToolArguments(final ArgumentParser parser)
299         throws ArgumentException
300  {
301    // Add the global arguments.
302    sourceLDIF = new FileArgument('l', "sourceLDIF", true, 0, null,
303         INFO_SPLIT_LDIF_GLOBAL_ARG_DESC_SOURCE_LDIF.get(), true, false, true,
304         false);
305    sourceLDIF.addLongIdentifier("inputLDIF", true);
306    sourceLDIF.addLongIdentifier("source-ldif", true);
307    sourceLDIF.addLongIdentifier("input-ldif", true);
308    parser.addArgument(sourceLDIF);
309
310    sourceCompressed = new BooleanArgument('C', "sourceCompressed",
311         INFO_SPLIT_LDIF_GLOBAL_ARG_DESC_SOURCE_COMPRESSED.get());
312    sourceCompressed.addLongIdentifier("inputCompressed", true);
313    sourceCompressed.addLongIdentifier("source-compressed", true);
314    sourceCompressed.addLongIdentifier("input-compressed", true);
315    parser.addArgument(sourceCompressed);
316
317    targetLDIFBasePath = new FileArgument('o', "targetLDIFBasePath", false, 1,
318         null, INFO_SPLIT_LDIF_GLOBAL_ARG_DESC_TARGET_LDIF_BASE.get(), false,
319         true, true, false);
320    targetLDIFBasePath.addLongIdentifier("outputLDIFBasePath", true);
321    targetLDIFBasePath.addLongIdentifier("target-ldif-base-path", true);
322    targetLDIFBasePath.addLongIdentifier("output-ldif-base-path", true);
323    parser.addArgument(targetLDIFBasePath);
324
325    compressTarget = new BooleanArgument('c', "compressTarget",
326         INFO_SPLIT_LDIF_GLOBAL_ARG_DESC_COMPRESS_TARGET.get());
327    compressTarget.addLongIdentifier("compressOutput", true);
328    compressTarget.addLongIdentifier("compress", true);
329    compressTarget.addLongIdentifier("compress-target", true);
330    compressTarget.addLongIdentifier("compress-output", true);
331    parser.addArgument(compressTarget);
332
333    encryptTarget = new BooleanArgument(null, "encryptTarget",
334         INFO_SPLIT_LDIF_GLOBAL_ARG_DESC_ENCRYPT_TARGET.get());
335    encryptTarget.addLongIdentifier("encryptOutput", true);
336    encryptTarget.addLongIdentifier("encrypt", true);
337    encryptTarget.addLongIdentifier("encrypt-target", true);
338    encryptTarget.addLongIdentifier("encrypt-output", true);
339    parser.addArgument(encryptTarget);
340
341    encryptionPassphraseFile = new FileArgument(null,
342         "encryptionPassphraseFile", false, 1, null,
343         INFO_SPLIT_LDIF_GLOBAL_ARG_DESC_ENCRYPT_PW_FILE.get(), true, true,
344         true, false);
345    encryptionPassphraseFile.addLongIdentifier("encryptionPasswordFile", true);
346    encryptionPassphraseFile.addLongIdentifier("encryption-passphrase-file",
347         true);
348    encryptionPassphraseFile.addLongIdentifier("encryption-password-file",
349         true);
350    parser.addArgument(encryptionPassphraseFile);
351
352    splitBaseDN = new DNArgument('b', "splitBaseDN", true, 1, null,
353         INFO_SPLIT_LDIF_GLOBAL_ARG_DESC_SPLIT_BASE_DN.get());
354    splitBaseDN.addLongIdentifier("baseDN", true);
355    splitBaseDN.addLongIdentifier("split-base-dn", true);
356    splitBaseDN.addLongIdentifier("base-dn", true);
357    parser.addArgument(splitBaseDN);
358
359    addEntriesOutsideSplitBaseDNToAllSets = new BooleanArgument(null,
360         "addEntriesOutsideSplitBaseDNToAllSets", 1,
361         INFO_SPLIT_LDIF_GLOBAL_ARG_DESC_OUTSIDE_TO_ALL_SETS.get());
362    addEntriesOutsideSplitBaseDNToAllSets.addLongIdentifier(
363         "add-entries-outside-split-base-dn-to-all-sets", true);
364    parser.addArgument(addEntriesOutsideSplitBaseDNToAllSets);
365
366    addEntriesOutsideSplitBaseDNToDedicatedSet = new BooleanArgument(null,
367         "addEntriesOutsideSplitBaseDNToDedicatedSet", 1,
368         INFO_SPLIT_LDIF_GLOBAL_ARG_DESC_OUTSIDE_TO_DEDICATED_SET.get());
369    addEntriesOutsideSplitBaseDNToDedicatedSet.addLongIdentifier(
370         "add-entries-outside-split-base-dn-to-dedicated-set", true);
371    parser.addArgument(addEntriesOutsideSplitBaseDNToDedicatedSet);
372
373    schemaPath = new FileArgument(null, "schemaPath", false, 0, null,
374         INFO_SPLIT_LDIF_GLOBAL_ARG_DESC_SCHEMA_PATH.get(), true, false, false,
375         false);
376    schemaPath.addLongIdentifier("schemaFile", true);
377    schemaPath.addLongIdentifier("schemaDirectory", true);
378    schemaPath.addLongIdentifier("schema-path", true);
379    schemaPath.addLongIdentifier("schema-file", true);
380    schemaPath.addLongIdentifier("schema-directory", true);
381    parser.addArgument(schemaPath);
382
383    numThreads = new IntegerArgument('t', "numThreads", false, 1, null,
384         INFO_SPLIT_LDIF_GLOBAL_ARG_DESC_NUM_THREADS.get(), 1,
385         Integer.MAX_VALUE, 1);
386    numThreads.addLongIdentifier("num-threads", true);
387    parser.addArgument(numThreads);
388
389
390    // Add the subcommand used to split entries using a hash on the RDN.
391    final ArgumentParser splitUsingHashOnRDNParser = new ArgumentParser(
392         "split-using-hash-on-rdn", INFO_SPLIT_LDIF_SC_HASH_ON_RDN_DESC.get());
393
394    splitUsingHashOnRDNNumSets = new IntegerArgument(null, "numSets", true, 1,
395         null, INFO_SPLIT_LDIF_SC_HASH_ON_RDN_ARG_DESC_NUM_SETS.get(), 2,
396         Integer.MAX_VALUE);
397    splitUsingHashOnRDNNumSets.addLongIdentifier("num-sets", true);
398    splitUsingHashOnRDNParser.addArgument(splitUsingHashOnRDNNumSets);
399
400    final LinkedHashMap<String[],String> splitUsingHashOnRDNExamples =
401         new LinkedHashMap<String[],String>(1);
402    splitUsingHashOnRDNExamples.put(
403         new String[]
404         {
405           "split-using-hash-on-rdn",
406           "--sourceLDIF", "whole.ldif",
407           "--targetLDIFBasePath", "split.ldif",
408           "--splitBaseDN", "ou=People,dc=example,dc=com",
409           "--numSets", "4",
410           "--schemaPath", "config/schema",
411           "--addEntriesOutsideSplitBaseDNToAllSets"
412         },
413         INFO_SPLIT_LDIF_SC_HASH_ON_RDN_EXAMPLE.get());
414
415    splitUsingHashOnRDN = new SubCommand("split-using-hash-on-rdn",
416         INFO_SPLIT_LDIF_SC_HASH_ON_RDN_DESC.get(), splitUsingHashOnRDNParser,
417         splitUsingHashOnRDNExamples);
418    splitUsingHashOnRDN.addName("hash-on-rdn", true);
419
420    parser.addSubCommand(splitUsingHashOnRDN);
421
422
423    // Add the subcommand used to split entries using a hash on a specified
424    // attribute.
425    final ArgumentParser splitUsingHashOnAttributeParser = new ArgumentParser(
426         "split-using-hash-on-attribute",
427         INFO_SPLIT_LDIF_SC_HASH_ON_ATTR_DESC.get());
428
429    splitUsingHashOnAttributeAttributeName = new StringArgument(null,
430         "attributeName", true, 1, "{attr}",
431         INFO_SPLIT_LDIF_SC_HASH_ON_ATTR_ARG_DESC_ATTR_NAME.get());
432    splitUsingHashOnAttributeAttributeName.addLongIdentifier("attribute-name",
433         true);
434    splitUsingHashOnAttributeParser.addArgument(
435         splitUsingHashOnAttributeAttributeName);
436
437    splitUsingHashOnAttributeNumSets = new IntegerArgument(null, "numSets",
438         true, 1, null, INFO_SPLIT_LDIF_SC_HASH_ON_ATTR_ARG_DESC_NUM_SETS.get(),
439         2, Integer.MAX_VALUE);
440    splitUsingHashOnAttributeNumSets.addLongIdentifier("num-sets", true);
441    splitUsingHashOnAttributeParser.addArgument(
442         splitUsingHashOnAttributeNumSets);
443
444    splitUsingHashOnAttributeUseAllValues = new BooleanArgument(null,
445         "useAllValues", 1,
446         INFO_SPLIT_LDIF_SC_HASH_ON_ATTR_ARG_DESC_ALL_VALUES.get());
447    splitUsingHashOnAttributeUseAllValues.addLongIdentifier("use-all-values",
448         true);
449    splitUsingHashOnAttributeParser.addArgument(
450         splitUsingHashOnAttributeUseAllValues);
451
452    splitUsingHashOnAttributeAssumeFlatDIT = new BooleanArgument(null,
453         "assumeFlatDIT", 1,
454         INFO_SPLIT_LDIF_SC_HASH_ON_ATTR_ARG_DESC_ASSUME_FLAT_DIT.get());
455    splitUsingHashOnAttributeAssumeFlatDIT.addLongIdentifier("assume-flat-dit",
456         true);
457    splitUsingHashOnAttributeParser.addArgument(
458         splitUsingHashOnAttributeAssumeFlatDIT);
459
460    final LinkedHashMap<String[],String> splitUsingHashOnAttributeExamples =
461         new LinkedHashMap<String[],String>(1);
462    splitUsingHashOnAttributeExamples.put(
463         new String[]
464         {
465           "split-using-hash-on-attribute",
466           "--sourceLDIF", "whole.ldif",
467           "--targetLDIFBasePath", "split.ldif",
468           "--splitBaseDN", "ou=People,dc=example,dc=com",
469           "--attributeName", "uid",
470           "--numSets", "4",
471           "--schemaPath", "config/schema",
472           "--addEntriesOutsideSplitBaseDNToAllSets"
473         },
474         INFO_SPLIT_LDIF_SC_HASH_ON_ATTR_EXAMPLE.get());
475
476    splitUsingHashOnAttribute = new SubCommand("split-using-hash-on-attribute",
477         INFO_SPLIT_LDIF_SC_HASH_ON_ATTR_DESC.get(),
478         splitUsingHashOnAttributeParser, splitUsingHashOnAttributeExamples);
479    splitUsingHashOnAttribute.addName("hash-on-attribute", true);
480
481    parser.addSubCommand(splitUsingHashOnAttribute);
482
483
484    // Add the subcommand used to split entries by selecting the set with the
485    // fewest entries.
486    final ArgumentParser splitUsingFewestEntriesParser = new ArgumentParser(
487         "split-using-fewest-entries",
488         INFO_SPLIT_LDIF_SC_FEWEST_ENTRIES_DESC.get());
489
490    splitUsingFewestEntriesNumSets = new IntegerArgument(null, "numSets",
491         true, 1, null,
492         INFO_SPLIT_LDIF_SC_FEWEST_ENTRIES_ARG_DESC_NUM_SETS.get(),
493         2, Integer.MAX_VALUE);
494    splitUsingFewestEntriesNumSets.addLongIdentifier("num-sets", true);
495    splitUsingFewestEntriesParser.addArgument(splitUsingFewestEntriesNumSets);
496
497    splitUsingFewestEntriesAssumeFlatDIT = new BooleanArgument(null,
498         "assumeFlatDIT", 1,
499         INFO_SPLIT_LDIF_SC_FEWEST_ENTRIES_ARG_DESC_ASSUME_FLAT_DIT.get());
500    splitUsingFewestEntriesAssumeFlatDIT.addLongIdentifier("assume-flat-dit",
501         true);
502    splitUsingFewestEntriesParser.addArgument(
503         splitUsingFewestEntriesAssumeFlatDIT);
504
505    final LinkedHashMap<String[],String> splitUsingFewestEntriesExamples =
506         new LinkedHashMap<String[],String>(1);
507    splitUsingFewestEntriesExamples.put(
508         new String[]
509         {
510           "split-using-fewest-entries",
511           "--sourceLDIF", "whole.ldif",
512           "--targetLDIFBasePath", "split.ldif",
513           "--splitBaseDN", "ou=People,dc=example,dc=com",
514           "--numSets", "4",
515           "--schemaPath", "config/schema",
516           "--addEntriesOutsideSplitBaseDNToAllSets"
517         },
518         INFO_SPLIT_LDIF_SC_FEWEST_ENTRIES_EXAMPLE.get());
519
520    splitUsingFewestEntries = new SubCommand("split-using-fewest-entries",
521         INFO_SPLIT_LDIF_SC_FEWEST_ENTRIES_DESC.get(),
522         splitUsingFewestEntriesParser, splitUsingFewestEntriesExamples);
523    splitUsingFewestEntries.addName("fewest-entries", true);
524
525    parser.addSubCommand(splitUsingFewestEntries);
526
527
528    // Add the subcommand used to split entries by selecting the set based on a
529    // filter.
530    final ArgumentParser splitUsingFilterParser = new ArgumentParser(
531         "split-using-filter", INFO_SPLIT_LDIF_SC_FILTER_DESC.get());
532
533    splitUsingFilterFilter = new FilterArgument(null, "filter", true, 0, null,
534         INFO_SPLIT_LDIF_SC_FILTER_ARG_DESC_FILTER.get());
535    splitUsingFilterParser.addArgument(splitUsingFilterFilter);
536
537    splitUsingFilterAssumeFlatDIT = new BooleanArgument(null, "assumeFlatDIT",
538         1, INFO_SPLIT_LDIF_SC_FILTER_ARG_DESC_ASSUME_FLAT_DIT.get());
539    splitUsingFilterAssumeFlatDIT.addLongIdentifier("assume-flat-dit", true);
540    splitUsingFilterParser.addArgument(splitUsingFilterAssumeFlatDIT);
541
542    final LinkedHashMap<String[],String> splitUsingFilterExamples =
543         new LinkedHashMap<String[],String>(1);
544    splitUsingFilterExamples.put(
545         new String[]
546         {
547           "split-using-filter",
548           "--sourceLDIF", "whole.ldif",
549           "--targetLDIFBasePath", "split.ldif",
550           "--splitBaseDN", "ou=People,dc=example,dc=com",
551           "--filter", "(timeZone=Eastern)",
552           "--filter", "(timeZone=Central)",
553           "--filter", "(timeZone=Mountain)",
554           "--filter", "(timeZone=Pacific)",
555           "--schemaPath", "config/schema",
556           "--addEntriesOutsideSplitBaseDNToAllSets"
557         },
558         INFO_SPLIT_LDIF_SC_FILTER_EXAMPLE.get());
559
560    splitUsingFilter = new SubCommand("split-using-filter",
561         INFO_SPLIT_LDIF_SC_FILTER_DESC.get(),
562         splitUsingFilterParser, splitUsingFilterExamples);
563    splitUsingFilter.addName("filter", true);
564
565    parser.addSubCommand(splitUsingFilter);
566  }
567
568
569
570  /**
571   * {@inheritDoc}
572   */
573  @Override()
574  public void doExtendedArgumentValidation()
575         throws ArgumentException
576  {
577    // If multiple sourceLDIF values were provided, then a target LDIF base path
578    // must have been given.
579    final List<File> sourceLDIFValues = sourceLDIF.getValues();
580    if (sourceLDIFValues.size() > 1)
581    {
582      if (! targetLDIFBasePath.isPresent())
583      {
584        throw new ArgumentException(ERR_SPLIT_LDIF_NO_TARGET_BASE_PATH.get(
585             sourceLDIF.getIdentifierString(),
586             targetLDIFBasePath.getIdentifierString()));
587      }
588    }
589
590
591    // If the split-using-filter subcommand was provided, then at least two
592    // filters must have been provided, and none of the filters can be logically
593    // equivalent to any of the others.
594    if (splitUsingFilter.isPresent())
595    {
596      final List<Filter> filterList = splitUsingFilterFilter.getValues();
597      final Set<Filter> filterSet =
598           new LinkedHashSet<Filter>(filterList.size());
599      for (final Filter f : filterList)
600      {
601        if (filterSet.contains(f))
602        {
603          throw new ArgumentException(ERR_SPLIT_LDIF_NON_UNIQUE_FILTER.get(
604               splitUsingFilterFilter.getIdentifierString(), f.toString()));
605        }
606        else
607        {
608          filterSet.add(f);
609        }
610      }
611
612      if (filterSet.size() < 2)
613      {
614        throw new ArgumentException(ERR_SPLIT_LDIF_NOT_ENOUGH_FILTERS.get(
615             splitUsingFilter.getPrimaryName(),
616             splitUsingFilterFilter.getIdentifierString()));
617      }
618    }
619  }
620
621
622
623  /**
624   * {@inheritDoc}
625   */
626  @Override()
627  public ResultCode doToolProcessing()
628  {
629    // Get the schema to use during processing.
630    final Schema schema;
631    try
632    {
633      schema = getSchema();
634    }
635    catch (final LDAPException le)
636    {
637      wrapErr(0, MAX_OUTPUT_LINE_LENGTH, le.getMessage());
638      return le.getResultCode();
639    }
640
641
642    // If an encryption passphrase file is provided, then get the passphrase
643    // from it.
644    String encryptionPassphrase = null;
645    if (encryptionPassphraseFile.isPresent())
646    {
647      try
648      {
649        encryptionPassphrase = ToolUtils.readEncryptionPassphraseFromFile(
650             encryptionPassphraseFile.getValue());
651      }
652      catch (final LDAPException e)
653      {
654        Debug.debugException(e);
655        wrapErr(0, MAX_OUTPUT_LINE_LENGTH, e.getMessage());
656        return e.getResultCode();
657      }
658    }
659
660
661    // Figure out which subcommand was selected, and create the appropriate
662    // translator to use to perform the processing.
663    final SplitLDIFTranslator translator;
664    if (splitUsingHashOnRDN.isPresent())
665    {
666      translator = new SplitLDIFRDNHashTranslator(splitBaseDN.getValue(),
667           splitUsingHashOnRDNNumSets.getValue(),
668           addEntriesOutsideSplitBaseDNToAllSets.isPresent(),
669           addEntriesOutsideSplitBaseDNToDedicatedSet.isPresent());
670    }
671    else if (splitUsingHashOnAttribute.isPresent())
672    {
673      translator = new SplitLDIFAttributeHashTranslator(splitBaseDN.getValue(),
674           splitUsingHashOnAttributeNumSets.getValue(),
675           splitUsingHashOnAttributeAttributeName.getValue(),
676           splitUsingHashOnAttributeUseAllValues.isPresent(),
677           splitUsingHashOnAttributeAssumeFlatDIT.isPresent(),
678           addEntriesOutsideSplitBaseDNToAllSets.isPresent(),
679           addEntriesOutsideSplitBaseDNToDedicatedSet.isPresent());
680    }
681    else if (splitUsingFewestEntries.isPresent())
682    {
683      translator = new SplitLDIFFewestEntriesTranslator(splitBaseDN.getValue(),
684           splitUsingFewestEntriesNumSets.getValue(),
685           splitUsingFewestEntriesAssumeFlatDIT.isPresent(),
686           addEntriesOutsideSplitBaseDNToAllSets.isPresent(),
687           addEntriesOutsideSplitBaseDNToDedicatedSet.isPresent());
688    }
689    else if (splitUsingFilter.isPresent())
690    {
691      final List<Filter> filterList = splitUsingFilterFilter.getValues();
692      final LinkedHashSet<Filter> filterSet =
693           new LinkedHashSet<Filter>(filterList.size());
694      for (final Filter f : filterList)
695      {
696        filterSet.add(f);
697      }
698
699      translator = new SplitLDIFFilterTranslator(splitBaseDN.getValue(),
700           schema, filterSet, splitUsingFilterAssumeFlatDIT.isPresent(),
701           addEntriesOutsideSplitBaseDNToAllSets.isPresent(),
702           addEntriesOutsideSplitBaseDNToDedicatedSet.isPresent());
703    }
704    else
705    {
706      // This should never happen.
707      wrapErr(0, MAX_OUTPUT_LINE_LENGTH,
708           ERR_SPLIT_LDIF_CANNOT_DETERMINE_SPLIT_ALGORITHM.get(
709                splitUsingHashOnRDN.getPrimaryName() + ", " +
710                splitUsingHashOnAttribute.getPrimaryName() + ", " +
711                splitUsingFewestEntries.getPrimaryName() + ", " +
712                splitUsingFilter.getPrimaryName()));
713      return ResultCode.PARAM_ERROR;
714    }
715
716
717    // Create the LDIF reader.
718    final LDIFReader ldifReader;
719    try
720    {
721      final InputStream inputStream;
722      if (sourceLDIF.isPresent())
723      {
724        final ObjectPair<InputStream,String> p =
725             ToolUtils.getInputStreamForLDIFFiles(sourceLDIF.getValues(),
726                  encryptionPassphrase, getOut(), getErr());
727        inputStream = p.getFirst();
728        if ((encryptionPassphrase == null) && (p.getSecond() != null))
729        {
730          encryptionPassphrase = p.getSecond();
731        }
732      }
733      else
734      {
735        inputStream = System.in;
736      }
737
738      ldifReader = new LDIFReader(inputStream, numThreads.getValue(),
739           translator);
740      if (schema != null)
741      {
742        ldifReader.setSchema(schema);
743      }
744    }
745    catch (final Exception e)
746    {
747      Debug.debugException(e);
748      wrapErr(0, MAX_OUTPUT_LINE_LENGTH,
749           ERR_SPLIT_LDIF_ERROR_CREATING_LDIF_READER.get(
750                StaticUtils.getExceptionMessage(e)));
751      return ResultCode.LOCAL_ERROR;
752    }
753
754
755    // Iterate through and process all of the entries.
756    ResultCode resultCode = ResultCode.SUCCESS;
757    final LinkedHashMap<String,OutputStream> outputStreams =
758         new LinkedHashMap<String,OutputStream>(10);
759    try
760    {
761      final AtomicLong entriesRead = new AtomicLong(0L);
762      final AtomicLong entriesExcluded = new AtomicLong(0L);
763      final TreeMap<String,AtomicLong> fileCounts =
764           new TreeMap<String,AtomicLong>();
765
766readLoop:
767      while (true)
768      {
769        final SplitLDIFEntry entry;
770        try
771        {
772          entry = (SplitLDIFEntry) ldifReader.readEntry();
773        }
774        catch (final LDIFException le)
775        {
776          Debug.debugException(le);
777          resultCode = ResultCode.LOCAL_ERROR;
778
779          final File f = getOutputFile(SplitLDIFEntry.SET_NAME_ERRORS);
780          OutputStream s = outputStreams.get(SplitLDIFEntry.SET_NAME_ERRORS);
781          if (s == null)
782          {
783            try
784            {
785              s = new FileOutputStream(f);
786
787              if (encryptTarget.isPresent())
788              {
789                if (encryptionPassphrase == null)
790                {
791                  try
792                  {
793                    encryptionPassphrase =
794                         ToolUtils.promptForEncryptionPassphrase(false, true,
795                              getOut(), getErr());
796                  }
797                  catch (final LDAPException ex)
798                  {
799                    Debug.debugException(ex);
800                    wrapErr(0, MAX_OUTPUT_LINE_LENGTH, ex.getMessage());
801                    return ex.getResultCode();
802                  }
803                }
804
805                s = new PassphraseEncryptedOutputStream(encryptionPassphrase,
806                     s);
807              }
808
809              if (compressTarget.isPresent())
810              {
811                s = new GZIPOutputStream(s);
812              }
813
814              outputStreams.put(SplitLDIFEntry.SET_NAME_ERRORS, s);
815              fileCounts.put(SplitLDIFEntry.SET_NAME_ERRORS,
816                   new AtomicLong(0L));
817            }
818            catch (final Exception e)
819            {
820              Debug.debugException(e);
821              resultCode = ResultCode.LOCAL_ERROR;
822              wrapErr(0, MAX_OUTPUT_LINE_LENGTH,
823                   ERR_SPLIT_LDIF_CANNOT_OPEN_OUTPUT_FILE.get(
824                        f.getAbsolutePath(),
825                        StaticUtils.getExceptionMessage(e)));
826              break readLoop;
827            }
828          }
829
830          final ByteStringBuffer buffer = new ByteStringBuffer();
831          buffer.append("# ");
832          buffer.append(le.getMessage());
833          buffer.append(StaticUtils.EOL_BYTES);
834
835          final List<String> dataLines = le.getDataLines();
836          if (dataLines != null)
837          {
838            for (final String dataLine : dataLines)
839            {
840              buffer.append(dataLine);
841              buffer.append(StaticUtils.EOL_BYTES);
842            }
843          }
844
845          buffer.append(StaticUtils.EOL_BYTES);
846
847          try
848          {
849            s.write(buffer.toByteArray());
850          }
851          catch (final Exception e)
852          {
853              Debug.debugException(e);
854              resultCode = ResultCode.LOCAL_ERROR;
855              wrapErr(0, MAX_OUTPUT_LINE_LENGTH,
856                   ERR_SPLIT_LDIF_ERROR_WRITING_ERROR_TO_FILE.get(
857                        le.getMessage(), f.getAbsolutePath(),
858                        StaticUtils.getExceptionMessage(e)));
859              break readLoop;
860          }
861
862          if (le.mayContinueReading())
863          {
864            wrapErr(0, MAX_OUTPUT_LINE_LENGTH,
865                 ERR_SPLIT_LDIF_INVALID_LDIF_RECORD_RECOVERABLE.get(
866                      StaticUtils.getExceptionMessage(le)));
867            continue;
868          }
869          else
870          {
871            wrapErr(0, MAX_OUTPUT_LINE_LENGTH,
872                 ERR_SPLIT_LDIF_INVALID_LDIF_RECORD_UNRECOVERABLE.get(
873                      StaticUtils.getExceptionMessage(le)));
874            break;
875          }
876        }
877        catch (final IOException ioe)
878        {
879          Debug.debugException(ioe);
880          resultCode = ResultCode.LOCAL_ERROR;
881          wrapErr(0, MAX_OUTPUT_LINE_LENGTH,
882               ERR_SPLIT_LDIF_IO_READ_ERROR.get(
883                    StaticUtils.getExceptionMessage(ioe)));
884          break;
885        }
886        catch (final Exception e)
887        {
888          Debug.debugException(e);
889          resultCode = ResultCode.LOCAL_ERROR;
890          wrapErr(0, MAX_OUTPUT_LINE_LENGTH,
891               ERR_SPLIT_LDIF_UNEXPECTED_READ_ERROR.get(
892                    StaticUtils.getExceptionMessage(e)));
893          break;
894        }
895
896        if (entry == null)
897        {
898          break;
899        }
900
901        final long readCount = entriesRead.incrementAndGet();
902        if ((readCount % 1000L) == 0)
903        {
904          // Even though we aren't done with this entry yet, we'll go ahead and
905          // log a progress message now because it's easier to do that now than
906          // to ensure that it's handled properly through all possible error
907          // conditions that need to be handled below.
908          wrapOut(0, MAX_OUTPUT_LINE_LENGTH,
909               INFO_SPLIT_LDIF_PROGRESS.get(readCount));
910        }
911
912
913        // Get the set(s) to which the entry should be written.  If this is
914        // null (which could be the case as a result of a race condition when
915        // using multiple threads where processing for a child completes before
916        // processing for its parent, or as a result of a case in which a
917        // child is included without or before its parent), then try to see if
918        // we can get the sets by passing the entry through the translator.
919        Set<String> sets = entry.getSets();
920        byte[] ldifBytes = entry.getLDIFBytes();
921        if (sets == null)
922        {
923          try
924          {
925            sets = translator.translate(entry, 0L).getSets();
926          }
927          catch (final Exception e)
928          {
929            Debug.debugException(e);
930          }
931
932          if (sets == null)
933          {
934            final SplitLDIFEntry errorEntry =  translator.createEntry(entry,
935                 ERR_SPLIT_LDIF_ENTRY_WITHOUT_PARENT.get(
936                      entry.getDN(), splitBaseDN.getStringValue()),
937                 Collections.singleton(SplitLDIFEntry.SET_NAME_ERRORS));
938            ldifBytes = errorEntry.getLDIFBytes();
939            sets = errorEntry.getSets();
940          }
941        }
942
943
944        // If the entry shouldn't be written into any sets, then we don't need
945        // to do anything else.
946        if (sets.isEmpty())
947        {
948          entriesExcluded.incrementAndGet();
949          continue;
950        }
951
952
953        // Write the entry into each of the target sets, creating the output
954        // files if necessary.
955        for (final String set : sets)
956        {
957          if (set.equals(SplitLDIFEntry.SET_NAME_ERRORS))
958          {
959            // This indicates that an error was encountered during processing,
960            // so we'll update the result code to reflect that.
961            resultCode = ResultCode.LOCAL_ERROR;
962          }
963
964          final File f = getOutputFile(set);
965          OutputStream s = outputStreams.get(set);
966          if (s == null)
967          {
968            try
969            {
970              s = new FileOutputStream(f);
971
972              if (encryptTarget.isPresent())
973              {
974                if (encryptionPassphrase == null)
975                {
976                  try
977                  {
978                    encryptionPassphrase =
979                         ToolUtils.promptForEncryptionPassphrase(false, true,
980                              getOut(), getErr());
981                  }
982                  catch (final LDAPException ex)
983                  {
984                    Debug.debugException(ex);
985                    wrapErr(0, MAX_OUTPUT_LINE_LENGTH, ex.getMessage());
986                    return ex.getResultCode();
987                  }
988                }
989
990                s = new PassphraseEncryptedOutputStream(encryptionPassphrase,
991                     s);
992              }
993
994              if (compressTarget.isPresent())
995              {
996                s = new GZIPOutputStream(s);
997              }
998
999              outputStreams.put(set, s);
1000              fileCounts.put(set, new AtomicLong(0L));
1001            }
1002            catch (final Exception e)
1003            {
1004              Debug.debugException(e);
1005              resultCode = ResultCode.LOCAL_ERROR;
1006              wrapErr(0, MAX_OUTPUT_LINE_LENGTH,
1007                   ERR_SPLIT_LDIF_CANNOT_OPEN_OUTPUT_FILE.get(
1008                        f.getAbsolutePath(),
1009                        StaticUtils.getExceptionMessage(e)));
1010              break readLoop;
1011            }
1012          }
1013
1014          try
1015          {
1016            s.write(ldifBytes);
1017          }
1018          catch (final Exception e)
1019          {
1020              Debug.debugException(e);
1021              resultCode = ResultCode.LOCAL_ERROR;
1022              wrapErr(0, MAX_OUTPUT_LINE_LENGTH,
1023                   ERR_SPLIT_LDIF_ERROR_WRITING_TO_FILE.get(
1024                        entry.getDN(), f.getAbsolutePath(),
1025                        StaticUtils.getExceptionMessage(e)));
1026              break readLoop;
1027          }
1028
1029          fileCounts.get(set).incrementAndGet();
1030        }
1031      }
1032
1033
1034      // Processing is complete.  Summarize the processing that was performed.
1035      final long finalReadCount = entriesRead.get();
1036      if (finalReadCount > 1000L)
1037      {
1038        out();
1039      }
1040
1041      wrapOut(0, MAX_OUTPUT_LINE_LENGTH,
1042           INFO_SPLIT_LDIF_PROCESSING_COMPLETE.get(finalReadCount));
1043
1044      final long excludedCount = entriesExcluded.get();
1045      if (excludedCount > 0L)
1046      {
1047        wrapOut(0, MAX_OUTPUT_LINE_LENGTH,
1048             INFO_SPLIT_LDIF_EXCLUDED_COUNT.get(excludedCount));
1049      }
1050
1051      for (final Map.Entry<String,AtomicLong> e : fileCounts.entrySet())
1052      {
1053        final File f = getOutputFile(e.getKey());
1054        wrapOut(0, MAX_OUTPUT_LINE_LENGTH,
1055             INFO_SPLIT_LDIF_COUNT_TO_FILE.get(e.getValue().get(),
1056                  f.getName()));
1057      }
1058    }
1059    finally
1060    {
1061      try
1062      {
1063        ldifReader.close();
1064      }
1065      catch (final Exception e)
1066      {
1067        Debug.debugException(e);
1068      }
1069
1070      for (final Map.Entry<String,OutputStream> e : outputStreams.entrySet())
1071      {
1072        try
1073        {
1074          e.getValue().close();
1075        }
1076        catch (final Exception ex)
1077        {
1078          Debug.debugException(ex);
1079          resultCode = ResultCode.LOCAL_ERROR;
1080          wrapErr(0, MAX_OUTPUT_LINE_LENGTH,
1081               ERR_SPLIT_LDIF_ERROR_CLOSING_FILE.get(
1082                    getOutputFile(e.getKey()),
1083                    StaticUtils.getExceptionMessage(ex)));
1084        }
1085      }
1086    }
1087
1088    return resultCode;
1089  }
1090
1091
1092
1093  /**
1094   * Retrieves the schema that should be used for processing.
1095   *
1096   * @return  The schema that was created.
1097   *
1098   * @throws  LDAPException  If a problem is encountered while retrieving the
1099   *                         schema.
1100   */
1101  private Schema getSchema()
1102          throws LDAPException
1103  {
1104    // If any schema paths were specified, then load the schema only from those
1105    // paths.
1106    if (schemaPath.isPresent())
1107    {
1108      final ArrayList<File> schemaFiles = new ArrayList<File>(10);
1109      for (final File path : schemaPath.getValues())
1110      {
1111        if (path.isFile())
1112        {
1113          schemaFiles.add(path);
1114        }
1115        else
1116        {
1117          final TreeMap<String,File> fileMap = new TreeMap<String,File>();
1118          for (final File schemaDirFile : path.listFiles())
1119          {
1120            final String name = schemaDirFile.getName();
1121            if (schemaDirFile.isFile() && name.toLowerCase().endsWith(".ldif"))
1122            {
1123              fileMap.put(name, schemaDirFile);
1124            }
1125          }
1126          schemaFiles.addAll(fileMap.values());
1127        }
1128      }
1129
1130      if (schemaFiles.isEmpty())
1131      {
1132        throw new LDAPException(ResultCode.PARAM_ERROR,
1133             ERR_SPLIT_LDIF_NO_SCHEMA_FILES.get(
1134                  schemaPath.getIdentifierString()));
1135      }
1136      else
1137      {
1138        try
1139        {
1140          return Schema.getSchema(schemaFiles);
1141        }
1142        catch (final Exception e)
1143        {
1144          Debug.debugException(e);
1145          throw new LDAPException(ResultCode.LOCAL_ERROR,
1146               ERR_SPLIT_LDIF_ERROR_LOADING_SCHEMA.get(
1147                    StaticUtils.getExceptionMessage(e)));
1148        }
1149      }
1150    }
1151    else
1152    {
1153      // If the INSTANCE_ROOT environment variable is set and it refers to a
1154      // directory that has a config/schema subdirectory that has one or more
1155      // schema files in it, then read the schema from that directory.
1156      try
1157      {
1158        final String instanceRootStr = System.getenv("INSTANCE_ROOT");
1159        if (instanceRootStr != null)
1160        {
1161          final File instanceRoot = new File(instanceRootStr);
1162          final File configDir = new File(instanceRoot, "config");
1163          final File schemaDir = new File(configDir, "schema");
1164          if (schemaDir.exists())
1165          {
1166            final TreeMap<String,File> fileMap = new TreeMap<String,File>();
1167            for (final File schemaDirFile : schemaDir.listFiles())
1168            {
1169              final String name = schemaDirFile.getName();
1170              if (schemaDirFile.isFile() &&
1171                  name.toLowerCase().endsWith(".ldif"))
1172              {
1173                fileMap.put(name, schemaDirFile);
1174              }
1175            }
1176
1177            if (! fileMap.isEmpty())
1178            {
1179              return Schema.getSchema(new ArrayList<File>(fileMap.values()));
1180            }
1181          }
1182        }
1183      }
1184      catch (final Exception e)
1185      {
1186        Debug.debugException(e);
1187      }
1188    }
1189
1190
1191    // If we've gotten here, then just return null and the tool will try to use
1192    // the default standard schema.
1193    return null;
1194  }
1195
1196
1197
1198  /**
1199   * Retrieves a file object that refers to an output file with the provided
1200   * extension.
1201   *
1202   * @param  extension  The extension to use for the file.
1203   *
1204   * @return  A file object that refers to an output file with the provided
1205   *          extension.
1206   */
1207  private File getOutputFile(final String extension)
1208  {
1209    final File baseFile;
1210    if (targetLDIFBasePath.isPresent())
1211    {
1212      baseFile = targetLDIFBasePath.getValue();
1213    }
1214    else
1215    {
1216      baseFile = sourceLDIF.getValue();
1217    }
1218
1219    return new File(baseFile.getAbsolutePath() + extension);
1220  }
1221
1222
1223
1224  /**
1225   * {@inheritDoc}
1226   */
1227  @Override()
1228  public LinkedHashMap<String[],String> getExampleUsages()
1229  {
1230    final LinkedHashMap<String[],String> exampleMap =
1231         new LinkedHashMap<String[],String>(4);
1232
1233    for (final Map.Entry<String[],String> e :
1234         splitUsingHashOnRDN.getExampleUsages().entrySet())
1235    {
1236      exampleMap.put(e.getKey(), e.getValue());
1237    }
1238
1239    for (final Map.Entry<String[],String> e :
1240         splitUsingHashOnAttribute.getExampleUsages().entrySet())
1241    {
1242      exampleMap.put(e.getKey(), e.getValue());
1243    }
1244
1245    for (final Map.Entry<String[],String> e :
1246         splitUsingFewestEntries.getExampleUsages().entrySet())
1247    {
1248      exampleMap.put(e.getKey(), e.getValue());
1249    }
1250
1251    for (final Map.Entry<String[],String> e :
1252         splitUsingFilter.getExampleUsages().entrySet())
1253    {
1254      exampleMap.put(e.getKey(), e.getValue());
1255    }
1256
1257    return exampleMap;
1258  }
1259}