public final class CompressingTermVectorsWriter extends TermVectorsWriter
Modifier and Type | Class and Description |
---|---|
private class |
CompressingTermVectorsWriter.DocData
a pending doc
|
private class |
CompressingTermVectorsWriter.FieldData
a pending field
|
Modifier and Type | Field and Description |
---|---|
(package private) static boolean |
BULK_MERGE_ENABLED |
(package private) static java.lang.String |
BULK_MERGE_ENABLED_SYSPROP |
private int |
chunkSize |
(package private) static java.lang.String |
CODEC_SFX_DAT |
(package private) static java.lang.String |
CODEC_SFX_IDX |
private CompressionMode |
compressionMode |
private Compressor |
compressor |
private CompressingTermVectorsWriter.DocData |
curDoc |
private CompressingTermVectorsWriter.FieldData |
curField |
(package private) static int |
FLAGS_BITS |
private CompressingStoredFieldsIndexWriter |
indexWriter |
private BytesRef |
lastTerm |
private int[] |
lengthsBuf |
(package private) static int |
MAX_DOCUMENTS_PER_CHUNK |
private long |
numChunks |
private long |
numDirtyChunks |
private int |
numDocs |
(package private) static int |
OFFSETS |
(package private) static int |
PACKED_BLOCK_SIZE |
private GrowableByteArrayDataOutput |
payloadBytes |
private int[] |
payloadLengthsBuf |
(package private) static int |
PAYLOADS |
private java.util.Deque<CompressingTermVectorsWriter.DocData> |
pendingDocs |
(package private) static int |
POSITIONS |
private int[] |
positionsBuf |
private java.lang.String |
segment |
private int[] |
startOffsetsBuf |
private GrowableByteArrayDataOutput |
termSuffixes |
(package private) static java.lang.String |
VECTORS_EXTENSION |
(package private) static java.lang.String |
VECTORS_INDEX_EXTENSION |
private IndexOutput |
vectorsStream |
(package private) static int |
VERSION_CURRENT |
(package private) static int |
VERSION_START |
private BlockPackedWriter |
writer |
Constructor and Description |
---|
CompressingTermVectorsWriter(Directory directory,
SegmentInfo si,
java.lang.String segmentSuffix,
IOContext context,
java.lang.String formatName,
CompressionMode compressionMode,
int chunkSize,
int blockSize)
Sole constructor.
|
Modifier and Type | Method and Description |
---|---|
private CompressingTermVectorsWriter.DocData |
addDocData(int numVectorFields) |
void |
addPosition(int position,
int startOffset,
int endOffset,
BytesRef payload)
Adds a term position and offsets
|
void |
addProx(int numProx,
DataInput positions,
DataInput offsets)
Called by IndexWriter when writing new segments.
|
void |
close() |
void |
finish(FieldInfos fis,
int numDocs)
Called before
TermVectorsWriter.close() , passing in the number
of documents that were written. |
void |
finishDocument()
Called after a doc and all its fields have been added.
|
void |
finishField()
Called after a field and all its terms have been added.
|
private void |
flush() |
private int[] |
flushFieldNums()
Returns a sorted array containing unique field numbers
|
private void |
flushFields(int totalFields,
int[] fieldNums) |
private void |
flushFlags(int totalFields,
int[] fieldNums) |
private int |
flushNumFields(int chunkDocs) |
private void |
flushNumTerms(int totalFields) |
private void |
flushOffsets(int[] fieldNums) |
private void |
flushPayloadLengths() |
private void |
flushPositions() |
private void |
flushTermFreqs() |
private void |
flushTermLengths() |
int |
merge(MergeState mergeState)
Merges in the term vectors from the readers in
mergeState . |
void |
startDocument(int numVectorFields)
Called before writing the term vectors of the document.
|
void |
startField(FieldInfo info,
int numTerms,
boolean positions,
boolean offsets,
boolean payloads)
Called before writing the terms of the field.
|
void |
startTerm(BytesRef term,
int freq)
Adds a term and its term frequency
freq . |
(package private) boolean |
tooDirty(CompressingTermVectorsReader candidate)
Returns true if we should recompress this reader, even though we could bulk merge compressed data
|
private boolean |
triggerFlush() |
addAllDocVectors, finishTerm
static final int MAX_DOCUMENTS_PER_CHUNK
static final java.lang.String VECTORS_EXTENSION
static final java.lang.String VECTORS_INDEX_EXTENSION
static final java.lang.String CODEC_SFX_IDX
static final java.lang.String CODEC_SFX_DAT
static final int VERSION_START
static final int VERSION_CURRENT
static final int PACKED_BLOCK_SIZE
static final int POSITIONS
static final int OFFSETS
static final int PAYLOADS
static final int FLAGS_BITS
private final java.lang.String segment
private CompressingStoredFieldsIndexWriter indexWriter
private IndexOutput vectorsStream
private final CompressionMode compressionMode
private final Compressor compressor
private final int chunkSize
private long numChunks
private long numDirtyChunks
private int numDocs
private final java.util.Deque<CompressingTermVectorsWriter.DocData> pendingDocs
private CompressingTermVectorsWriter.DocData curDoc
private CompressingTermVectorsWriter.FieldData curField
private final BytesRef lastTerm
private int[] positionsBuf
private int[] startOffsetsBuf
private int[] lengthsBuf
private int[] payloadLengthsBuf
private final GrowableByteArrayDataOutput termSuffixes
private final GrowableByteArrayDataOutput payloadBytes
private final BlockPackedWriter writer
static final java.lang.String BULK_MERGE_ENABLED_SYSPROP
static final boolean BULK_MERGE_ENABLED
public CompressingTermVectorsWriter(Directory directory, SegmentInfo si, java.lang.String segmentSuffix, IOContext context, java.lang.String formatName, CompressionMode compressionMode, int chunkSize, int blockSize) throws java.io.IOException
java.io.IOException
private CompressingTermVectorsWriter.DocData addDocData(int numVectorFields)
public void close() throws java.io.IOException
close
in interface java.io.Closeable
close
in interface java.lang.AutoCloseable
close
in class TermVectorsWriter
java.io.IOException
public void startDocument(int numVectorFields) throws java.io.IOException
TermVectorsWriter
TermVectorsWriter.startField(FieldInfo, int, boolean, boolean, boolean)
will
be called numVectorFields
times. Note that if term
vectors are enabled, this is called even if the document
has no vector fields, in this case numVectorFields
will be zero.startDocument
in class TermVectorsWriter
java.io.IOException
public void finishDocument() throws java.io.IOException
TermVectorsWriter
finishDocument
in class TermVectorsWriter
java.io.IOException
public void startField(FieldInfo info, int numTerms, boolean positions, boolean offsets, boolean payloads) throws java.io.IOException
TermVectorsWriter
TermVectorsWriter.startTerm(BytesRef, int)
will be called numTerms
times.startField
in class TermVectorsWriter
java.io.IOException
public void finishField() throws java.io.IOException
TermVectorsWriter
finishField
in class TermVectorsWriter
java.io.IOException
public void startTerm(BytesRef term, int freq) throws java.io.IOException
TermVectorsWriter
freq
.
If this field has positions and/or offsets enabled, then
TermVectorsWriter.addPosition(int, int, int, BytesRef)
will be called
freq
times respectively.startTerm
in class TermVectorsWriter
java.io.IOException
public void addPosition(int position, int startOffset, int endOffset, BytesRef payload) throws java.io.IOException
TermVectorsWriter
addPosition
in class TermVectorsWriter
java.io.IOException
private boolean triggerFlush()
private void flush() throws java.io.IOException
java.io.IOException
private int flushNumFields(int chunkDocs) throws java.io.IOException
java.io.IOException
private int[] flushFieldNums() throws java.io.IOException
java.io.IOException
private void flushFields(int totalFields, int[] fieldNums) throws java.io.IOException
java.io.IOException
private void flushFlags(int totalFields, int[] fieldNums) throws java.io.IOException
java.io.IOException
private void flushNumTerms(int totalFields) throws java.io.IOException
java.io.IOException
private void flushTermLengths() throws java.io.IOException
java.io.IOException
private void flushTermFreqs() throws java.io.IOException
java.io.IOException
private void flushPositions() throws java.io.IOException
java.io.IOException
private void flushOffsets(int[] fieldNums) throws java.io.IOException
java.io.IOException
private void flushPayloadLengths() throws java.io.IOException
java.io.IOException
public void finish(FieldInfos fis, int numDocs) throws java.io.IOException
TermVectorsWriter
TermVectorsWriter.close()
, passing in the number
of documents that were written. Note that this is
intentionally redundant (equivalent to the number of
calls to TermVectorsWriter.startDocument(int)
, but a Codec should
check that this is the case to detect the JRE bug described
in LUCENE-1282.finish
in class TermVectorsWriter
java.io.IOException
public void addProx(int numProx, DataInput positions, DataInput offsets) throws java.io.IOException
TermVectorsWriter
This is an expert API that allows the codec to consume positions and offsets directly from the indexer.
The default implementation calls TermVectorsWriter.addPosition(int, int, int, BytesRef)
,
but subclasses can override this if they want to efficiently write
all the positions, then all the offsets, for example.
NOTE: This API is extremely expert and subject to change or removal!!!
addProx
in class TermVectorsWriter
java.io.IOException
public int merge(MergeState mergeState) throws java.io.IOException
TermVectorsWriter
mergeState
. The default implementation skips
over deleted documents, and uses TermVectorsWriter.startDocument(int)
,
TermVectorsWriter.startField(FieldInfo, int, boolean, boolean, boolean)
,
TermVectorsWriter.startTerm(BytesRef, int)
, TermVectorsWriter.addPosition(int, int, int, BytesRef)
,
and TermVectorsWriter.finish(FieldInfos, int)
,
returning the number of documents that were written.
Implementations can override this method for more sophisticated
merging (bulk-byte copying, etc).merge
in class TermVectorsWriter
java.io.IOException
boolean tooDirty(CompressingTermVectorsReader candidate)
The last chunk written for a segment is typically incomplete, so without recompressing, in some worst-case situations (e.g. frequent reopen with tiny flushes), over time the compression ratio can degrade. This is a safety switch.