public final class FST<T> extends java.lang.Object implements Accountable
The format is similar to what's used by Morfologik (https://github.com/morfologik/morfologik-stemming).
See the package
documentation
for some simple examples.
Modifier and Type | Class and Description |
---|---|
static class |
FST.Arc<T>
Represents a single arc.
|
static class |
FST.BytesReader
Reads bytes stored in an FST.
|
static class |
FST.INPUT_TYPE
Specifies allowed range of each int input label for
this FST.
|
Modifier and Type | Field and Description |
---|---|
private static long |
ARC_SHALLOW_RAM_BYTES_USED |
private static byte |
ARCS_AS_FIXED_ARRAY |
private static long |
BASE_RAM_BYTES_USED |
(package private) static int |
BIT_ARC_HAS_FINAL_OUTPUT |
static int |
BIT_ARC_HAS_OUTPUT
This flag is set if the arc has an output.
|
(package private) static int |
BIT_FINAL_ARC |
(package private) static int |
BIT_LAST_ARC |
(package private) static int |
BIT_STOP_NODE |
(package private) static int |
BIT_TARGET_NEXT |
(package private) BytesStore |
bytes
A
BytesStore , used during building, or during reading when
the FST is very large (more than 1 GB). |
private int |
cachedArcsBytesUsed |
private FST.Arc<T>[] |
cachedRootArcs |
static int |
DEFAULT_MAX_BLOCK_BITS |
(package private) T |
emptyOutput |
static int |
END_LABEL
If arc has this label then that arc is final/accepted
|
private static java.lang.String |
FILE_FORMAT_NAME |
private static long |
FINAL_END_NODE |
(package private) static int |
FIXED_ARRAY_NUM_ARCS_DEEP |
(package private) static int |
FIXED_ARRAY_NUM_ARCS_SHALLOW |
(package private) static int |
FIXED_ARRAY_SHALLOW_DISTANCE |
private FSTStore |
fstStore |
FST.INPUT_TYPE |
inputType |
private static long |
NON_FINAL_END_NODE |
Outputs<T> |
outputs |
private long |
startNode |
private int |
version |
private static int |
VERSION_CURRENT |
private static int |
VERSION_START |
Constructor and Description |
---|
FST(DataInput in,
Outputs<T> outputs)
Load a previously saved FST.
|
FST(DataInput in,
Outputs<T> outputs,
FSTStore fstStore)
Load a previously saved FST; maxBlockBits allows you to
control the size of the byte[] pages used to hold the FST bytes.
|
FST(FST.INPUT_TYPE inputType,
Outputs<T> outputs,
int bytesPageBits) |
Modifier and Type | Method and Description |
---|---|
(package private) long |
addNode(Builder<T> builder,
Builder.UnCompiledNode<T> nodeIn) |
private boolean |
assertRootCachedArc(int label,
FST.Arc<T> cachedArc) |
private void |
cacheRootArcs() |
FST.Arc<T> |
findTargetArc(int labelToMatch,
FST.Arc<T> follow,
FST.Arc<T> arc,
FST.BytesReader in)
Finds an arc leaving the incoming arc, replacing the arc in place.
|
private FST.Arc<T> |
findTargetArc(int labelToMatch,
FST.Arc<T> follow,
FST.Arc<T> arc,
FST.BytesReader in,
boolean useRootArcCache)
Finds an arc leaving the incoming arc, replacing the arc in place.
|
(package private) void |
finish(long newStartNode) |
private static boolean |
flag(int flags,
int bit) |
FST.BytesReader |
getBytesReader()
Returns a
FST.BytesReader for this FST, positioned at
position 0. |
T |
getEmptyOutput() |
FST.Arc<T> |
getFirstArc(FST.Arc<T> arc)
Fills virtual 'start' arc, ie, an empty incoming arc to
the FST's start node
|
FST.INPUT_TYPE |
getInputType() |
(package private) boolean |
isExpandedTarget(FST.Arc<T> follow,
FST.BytesReader in)
Checks if
arc 's target state is in expanded (or vector) format. |
long |
ramBytesUsed()
Return the memory usage of this object in bytes.
|
private long |
ramBytesUsed(FST.Arc<T>[] arcs) |
static <T> FST<T> |
read(java.nio.file.Path path,
Outputs<T> outputs)
Reads an automaton from a file.
|
FST.Arc<T> |
readFirstRealTargetArc(long node,
FST.Arc<T> arc,
FST.BytesReader in) |
FST.Arc<T> |
readFirstTargetArc(FST.Arc<T> follow,
FST.Arc<T> arc,
FST.BytesReader in)
Follow the
follow arc and read the first arc of its target;
this changes the provided arc (2nd arg) in-place and returns
it. |
int |
readLabel(DataInput in)
Reads one BYTE1/2/4 label from the provided
DataInput . |
FST.Arc<T> |
readLastTargetArc(FST.Arc<T> follow,
FST.Arc<T> arc,
FST.BytesReader in)
Follows the
follow arc and reads the last
arc of its target; this changes the provided
arc (2nd arg) in-place and returns it. |
FST.Arc<T> |
readNextArc(FST.Arc<T> arc,
FST.BytesReader in)
In-place read; returns the arc.
|
int |
readNextArcLabel(FST.Arc<T> arc,
FST.BytesReader in)
Peeks at next arc's label; does not alter arc.
|
FST.Arc<T> |
readNextRealArc(FST.Arc<T> arc,
FST.BytesReader in)
Never returns null, but you should never call this if
arc.isLast() is true.
|
private long |
readUnpackedNodeTarget(FST.BytesReader in) |
void |
save(DataOutput out) |
void |
save(java.nio.file.Path path)
Writes an automaton to a file.
|
private void |
seekToNextNode(FST.BytesReader in) |
(package private) void |
setEmptyOutput(T v) |
private boolean |
shouldExpand(Builder<T> builder,
Builder.UnCompiledNode<T> node)
Nodes will be expanded if their depth (distance from the root node) is
<= this value and their number of arcs is >=
FIXED_ARRAY_NUM_ARCS_SHALLOW . |
static <T> boolean |
targetHasArcs(FST.Arc<T> arc)
returns true if the node at this address has any
outgoing arcs
|
java.lang.String |
toString() |
private void |
writeLabel(DataOutput out,
int v) |
clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait
getChildResources
private static final long BASE_RAM_BYTES_USED
private static final long ARC_SHALLOW_RAM_BYTES_USED
static final int BIT_FINAL_ARC
static final int BIT_LAST_ARC
static final int BIT_TARGET_NEXT
static final int BIT_STOP_NODE
public static final int BIT_ARC_HAS_OUTPUT
static final int BIT_ARC_HAS_FINAL_OUTPUT
private static final byte ARCS_AS_FIXED_ARRAY
static final int FIXED_ARRAY_SHALLOW_DISTANCE
static final int FIXED_ARRAY_NUM_ARCS_SHALLOW
static final int FIXED_ARRAY_NUM_ARCS_DEEP
private static final java.lang.String FILE_FORMAT_NAME
private static final int VERSION_START
private static final int VERSION_CURRENT
private static final long FINAL_END_NODE
private static final long NON_FINAL_END_NODE
public static final int END_LABEL
public final FST.INPUT_TYPE inputType
T emptyOutput
final BytesStore bytes
BytesStore
, used during building, or during reading when
the FST is very large (more than 1 GB). If the FST is less than 1
GB then bytesArray is set instead.private final FSTStore fstStore
private long startNode
private final int version
public static final int DEFAULT_MAX_BLOCK_BITS
private int cachedArcsBytesUsed
FST(FST.INPUT_TYPE inputType, Outputs<T> outputs, int bytesPageBits)
public FST(DataInput in, Outputs<T> outputs) throws java.io.IOException
java.io.IOException
private static boolean flag(int flags, int bit)
public FST.INPUT_TYPE getInputType()
public long ramBytesUsed()
Accountable
ramBytesUsed
in interface Accountable
public java.lang.String toString()
toString
in class java.lang.Object
void finish(long newStartNode) throws java.io.IOException
java.io.IOException
private void cacheRootArcs() throws java.io.IOException
java.io.IOException
public T getEmptyOutput()
void setEmptyOutput(T v) throws java.io.IOException
java.io.IOException
public void save(DataOutput out) throws java.io.IOException
java.io.IOException
public void save(java.nio.file.Path path) throws java.io.IOException
java.io.IOException
public static <T> FST<T> read(java.nio.file.Path path, Outputs<T> outputs) throws java.io.IOException
java.io.IOException
private void writeLabel(DataOutput out, int v) throws java.io.IOException
java.io.IOException
public int readLabel(DataInput in) throws java.io.IOException
DataInput
.java.io.IOException
public static <T> boolean targetHasArcs(FST.Arc<T> arc)
long addNode(Builder<T> builder, Builder.UnCompiledNode<T> nodeIn) throws java.io.IOException
java.io.IOException
public FST.Arc<T> getFirstArc(FST.Arc<T> arc)
public FST.Arc<T> readLastTargetArc(FST.Arc<T> follow, FST.Arc<T> arc, FST.BytesReader in) throws java.io.IOException
follow
arc and reads the last
arc of its target; this changes the provided
arc
(2nd arg) in-place and returns it.arc
).java.io.IOException
private long readUnpackedNodeTarget(FST.BytesReader in) throws java.io.IOException
java.io.IOException
public FST.Arc<T> readFirstTargetArc(FST.Arc<T> follow, FST.Arc<T> arc, FST.BytesReader in) throws java.io.IOException
follow
arc and read the first arc of its target;
this changes the provided arc
(2nd arg) in-place and returns
it.arc
).java.io.IOException
public FST.Arc<T> readFirstRealTargetArc(long node, FST.Arc<T> arc, FST.BytesReader in) throws java.io.IOException
java.io.IOException
boolean isExpandedTarget(FST.Arc<T> follow, FST.BytesReader in) throws java.io.IOException
arc
's target state is in expanded (or vector) format.true
if arc
points to a state in an
expanded array format.java.io.IOException
public FST.Arc<T> readNextArc(FST.Arc<T> arc, FST.BytesReader in) throws java.io.IOException
java.io.IOException
public int readNextArcLabel(FST.Arc<T> arc, FST.BytesReader in) throws java.io.IOException
java.io.IOException
public FST.Arc<T> readNextRealArc(FST.Arc<T> arc, FST.BytesReader in) throws java.io.IOException
java.io.IOException
private boolean assertRootCachedArc(int label, FST.Arc<T> cachedArc) throws java.io.IOException
java.io.IOException
public FST.Arc<T> findTargetArc(int labelToMatch, FST.Arc<T> follow, FST.Arc<T> arc, FST.BytesReader in) throws java.io.IOException
java.io.IOException
private FST.Arc<T> findTargetArc(int labelToMatch, FST.Arc<T> follow, FST.Arc<T> arc, FST.BytesReader in, boolean useRootArcCache) throws java.io.IOException
java.io.IOException
private void seekToNextNode(FST.BytesReader in) throws java.io.IOException
java.io.IOException
private boolean shouldExpand(Builder<T> builder, Builder.UnCompiledNode<T> node)
FIXED_ARRAY_NUM_ARCS_SHALLOW
.
Fixed array consumes more RAM but enables binary search on the arcs (instead of a linear scan) on lookup by arc label.
true
if node
should be stored in an
expanded (array) form.FIXED_ARRAY_NUM_ARCS_DEEP
,
Builder.UnCompiledNode.depth
public FST.BytesReader getBytesReader()
FST.BytesReader
for this FST, positioned at
position 0.