00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052 #include <stdio.h>
00053 #include <assert.h>
00054 #include <string.h>
00055
00056
00057 #include <sphinx_config.h>
00058 #include <err.h>
00059 #include <ckd_alloc.h>
00060
00061
00062 #include "kdtree.h"
00063 #include "s2_semi_mgau.h"
00064
00065 #define KDTREE_VERSION 1
00066
00067 static int32
00068 read_tree_int(FILE * fp, const char *name, int32 * out, int32 optional)
00069 {
00070 char line[256];
00071 int n;
00072
00073 n = fscanf(fp, "%255s %d", line, out);
00074 if ((optional == 0 && n != 2) || strcmp(line, name)) {
00075 E_ERROR("%s not found: %d %s %d\n", name, n, line, out);
00076 return -1;
00077 }
00078 return n;
00079 }
00080
00081 static int32
00082 read_tree_float(FILE * fp, const char *name, float32 * out, int32 optional)
00083 {
00084 char line[256];
00085 int n;
00086
00087 n = fscanf(fp, "%255s %f", line, out);
00088 if ((optional == 0 && n != 2) || strcmp(line, name)) {
00089 E_ERROR("%s not found: %d %s %f\n", name, n, line, out);
00090 return -1;
00091 }
00092 return n;
00093 }
00094
00095 static int32
00096 read_bbi_list(FILE * fp, kd_tree_node_t * node, int32 maxbbi)
00097 {
00098 uint8 bbi_list[256];
00099 int bbi, nbbi = 0, nr;
00100
00101 if (maxbbi == -1)
00102 maxbbi = 256;
00103 if ((nr = read_tree_int(fp, "bbi", &bbi, TRUE)) < 0)
00104 return -1;
00105 if (nr > 1) {
00106 if (bbi >= 256) {
00107 E_ERROR("BBI Gaussian %d out of range! %d\n", bbi);
00108 return -1;
00109 }
00110 bbi_list[0] = bbi;
00111 nbbi = 1;
00112 while (fscanf(fp, "%d", &bbi) > 0) {
00113 if (feof(fp))
00114 break;
00115 if (bbi >= 256) {
00116 E_ERROR("BBI Gaussian %d out of range!\n", bbi);
00117 return -1;
00118 }
00119 if (nbbi < maxbbi)
00120 bbi_list[nbbi++] = bbi;
00121 }
00122 }
00123 if (node == NULL)
00124 return 0;
00125 if (nbbi > maxbbi)
00126 nbbi = maxbbi;
00127 node->n_bbi = nbbi;
00128 if (nbbi) {
00129 node->bbi = ckd_calloc(node->n_bbi, sizeof(*node->bbi));
00130 memcpy(node->bbi, bbi_list, node->n_bbi * sizeof(*node->bbi));
00131 }
00132 return 0;
00133 }
00134
00135 static int32
00136 read_kd_nodes(FILE * fp, kd_tree_t * tree, uint32 maxdepth, int32 maxbbi)
00137 {
00138 uint32 i, j, in, out;
00139 int32 ilevel, olevel;
00140
00141
00142 if (maxdepth == 0 || maxdepth > tree->n_level)
00143 maxdepth = tree->n_level;
00144 in = (1 << tree->n_level) - 1;
00145 out = (1 << maxdepth) - 1;
00146 tree->nodes = ckd_calloc(out, sizeof(kd_tree_node_t));
00147
00148
00149 for (j = i = 0; i < in; ++i) {
00150 float32 split_plane;
00151 int32 split_comp;
00152
00153 if (read_tree_int(fp, "NODE", &ilevel, FALSE) < 0)
00154 break;
00155 if (read_tree_int(fp, "split_comp", &split_comp, FALSE) < 0)
00156 return -1;
00157 if (read_tree_float(fp, "split_plane", &split_plane, FALSE) < 0)
00158 return -1;
00159 olevel = ilevel - (tree->n_level - maxdepth);
00160 if (olevel > 0) {
00161
00162 assert(j < out);
00163 tree->nodes[j].split_comp = split_comp;
00164 tree->nodes[j].split_plane = FLOAT2MFCC(split_plane);
00165
00166 if (olevel == 1) {
00167 if (read_bbi_list(fp, tree->nodes + j, maxbbi) < 0)
00168 return -1;
00169 }
00170 else {
00171 if (read_bbi_list(fp, NULL, 0) < 0)
00172 return -1;
00173 }
00174
00175 if (olevel > 1) {
00176 tree->nodes[j].left = j + 1;
00177 tree->nodes[j].right = j + (1 << (olevel - 1));
00178 }
00179 ++j;
00180 }
00181 else {
00182 if (read_bbi_list(fp, NULL, 0) < 0)
00183 return -1;
00184 }
00185 }
00186 E_INFO("Read %d nodes\n", j);
00187
00188 return 0;
00189 }
00190
00191 int32
00192 read_kd_trees(const char *infile, kd_tree_t *** out_trees,
00193 uint32 * out_n_trees, uint32 maxdepth, int32 maxbbi)
00194 {
00195 FILE *fp;
00196 char line[256];
00197 int n, version;
00198 uint32 i;
00199
00200 if ((fp = fopen(infile, "r")) == NULL) {
00201 E_ERROR("Failed to open %s", infile);
00202 return -1;
00203 }
00204 n = fscanf(fp, "%256s", line);
00205 if (n != 1 || strcmp(line, "KD-TREES")) {
00206 E_ERROR("%s doesn't appear to be a kd-tree file: %s\n",
00207 infile, line);
00208 return -1;
00209 }
00210 n = fscanf(fp, "%256s %d", line, &version);
00211 if (n != 2 || strcmp(line, "version") || version > KDTREE_VERSION) {
00212 E_ERROR("Unsupported kd-tree file format %s %d\n", line, version);
00213 return -1;
00214 }
00215 if (read_tree_int(fp, "n_trees", (int32 *)out_n_trees, FALSE) < 0)
00216 return -1;
00217
00218 *out_trees = ckd_calloc(*out_n_trees, sizeof(kd_tree_t **));
00219 for (i = 0; i < *out_n_trees; ++i) {
00220 kd_tree_t *tree;
00221 uint32 n_density;
00222 float32 threshold;
00223
00224 if (read_tree_int(fp, "TREE", &n, FALSE) < 0)
00225 goto error_out;
00226 if (n != i) {
00227 E_ERROR("Tree number %d out of sequence\n", n);
00228 goto error_out;
00229 }
00230
00231 E_INFO("Reading tree for feature %d\n", i);
00232 (*out_trees)[i] = tree = ckd_calloc(1, sizeof(*tree));
00233 if (read_tree_int(fp, "n_density", (int32 *)&n_density, FALSE) < 0)
00234 goto error_out;
00235 if (n_density > 256) {
00236 E_ERROR("Number of densities (%d) must be <= 256!\n", n_density);
00237 goto error_out;
00238 }
00239 if (read_tree_int(fp, "n_comp", (int32 *)&tree->n_comp, FALSE) < 0)
00240 goto error_out;
00241 if (read_tree_int(fp, "n_level", (int32 *)&tree->n_level, FALSE) < 0)
00242 goto error_out;
00243 if (tree->n_level > 16) {
00244 E_ERROR("Depth of tree (%d) must be < 16!\n", tree->n_level);
00245 goto error_out;
00246 }
00247 if (read_tree_float(fp, "threshold", &threshold, FALSE) < 0)
00248 goto error_out;
00249 E_INFO("n_density %d n_comp %d n_level %d threshold %f\n",
00250 n_density, tree->n_comp, tree->n_level, threshold);
00251 if (read_kd_nodes(fp, tree, maxdepth, maxbbi) < 0)
00252 goto error_out;
00253 if (maxdepth)
00254 tree->n_level = maxdepth;
00255 }
00256 fclose(fp);
00257 return 0;
00258
00259 error_out:
00260 fclose(fp);
00261 for (i = 0; i < *out_n_trees; ++i) {
00262 free_kd_tree((*out_trees)[i]);
00263 (*out_trees)[i] = NULL;
00264 }
00265 ckd_free(*out_trees);
00266 *out_trees = NULL;
00267 return -1;
00268 }
00269
00270 void
00271 free_kd_tree(kd_tree_t * tree)
00272 {
00273 uint32 i, n;
00274
00275 if (tree == NULL)
00276 return;
00277
00278 n = (1 << tree->n_level) - 1;
00279 for (i = 0; i < n; ++i)
00280 ckd_free(tree->nodes[i].bbi);
00281 ckd_free(tree->nodes);
00282 ckd_free(tree);
00283 }
00284
00285 kd_tree_node_t *
00286 eval_kd_tree(kd_tree_t * tree, mfcc_t * feat, uint32 maxdepth)
00287 {
00288 uint32 node;
00289
00290 node = 0;
00291 while (tree->nodes[node].left && --maxdepth > 0) {
00292 if (feat[tree->nodes[node].split_comp]
00293 < tree->nodes[node].split_plane)
00294 node = tree->nodes[node].left;
00295 else
00296 node = tree->nodes[node].right;
00297 }
00298 return tree->nodes + node;
00299 }