165 #pragma warning (disable: 4305)
175 #define _ABS(x) ((x) >= 0 ? (x) : -(x))
181 #define CONT_AD_ADFRMSIZE 256
183 #define CONT_AD_POWHISTSIZE 98
186 #define CONT_AD_CALIB_FRAMES (CONT_AD_POWHISTSIZE * 2)
188 #define CONT_AD_THRESH_UPDATE 100
191 #define CONT_AD_ADAPT_RATE 0.2
193 #define CONT_AD_SPS 16000
195 #define CONT_AD_DEFAULT_NOISE 30
196 #define CONT_AD_DELTA_SIL 10
197 #define CONT_AD_DELTA_SPEECH 17
198 #define CONT_AD_MIN_NOISE 2
199 #define CONT_AD_MAX_NOISE 70
201 #define CONT_AD_HIST_INERTIA 3
203 #define CONT_AD_WINSIZE 21
206 #define CONT_AD_SPEECH_ONSET 9
213 #define CONT_AD_SIL_ONSET 18
220 #define CONT_AD_LEADER 5
224 #define CONT_AD_TRAILER 10
235 fprintf(fp,
"PowHist:\n");
236 for (i = 0, j = 0; i < CONT_AD_POWHISTSIZE; i++) {
238 fprintf(fp,
"\t%3d %6d\n", i, r->
pow_hist[i]);
243 fprintf(fp,
"PH[%7.2f]:",
245 for (i = 0; i <= j; i++)
246 fprintf(fp,
" %2d", r->
pow_hist[i]);
258 cont_ad_frame_pow(int16 * buf, int32 * prev, int32 spf)
266 for (i = 0; i < spf; i++) {
268 v = (double) (buf[i] - p);
298 i = (int32) ((10.0 * (log10(sumsq) - log10((
double) spf))) + 0.5);
312 compute_frame_pow(
cont_ad_t * r, int32 frm)
335 for (i = 0; i < CONT_AD_POWHISTSIZE; i++)
347 int32 old_noise_level, old_thresh_sil, old_thresh_speech;
358 (i < CONT_AD_POWHISTSIZE) && (r->
pow_hist[i] == 0); i++);
367 for (j = i, th = i; (j < CONT_AD_POWHISTSIZE) && (j < i + 20); j++) {
368 if (max < r->pow_hist[j]) {
389 "%7.2fs %8df: NoisePeak: %d, Noiselevel: %d -> %d, Th-Sil: %d -> %d, Th-Sp: %d -> %d\n",
392 old_thresh_sil, r->
thresh_sil, old_thresh_speech,
413 sil2speech_transition(
cont_ad_t *r,
int frm)
418 seg = malloc(
sizeof(*seg));
421 if (seg->startfrm < 0)
422 seg->startfrm += CONT_AD_ADFRMSIZE;
438 n = frm - seg->startfrm;
440 n += CONT_AD_ADFRMSIZE;
444 "%7.2fs %8d[%3d]f: Sil -> Sp detect; seg start: %7.2fs %8d\n",
449 (
double) (n * r->
spf) / (
double) (r->
sps), n);
464 speech2sil_transition(
cont_ad_t *r,
int frm)
478 if (n >= CONT_AD_ADFRMSIZE)
479 n -= CONT_AD_ADFRMSIZE;
482 n += CONT_AD_ADFRMSIZE;
486 "%7.2fs %8d[%3d]f: Sp -> Sil detect; seg end: %7.2fs %8d\n",
489 (
double) (n * r->
spf) / (
double) (r->
sps), n);
508 if (f >= CONT_AD_ADFRMSIZE)
521 boundary_detect(
cont_ad_t * r, int32 frm)
537 "%7.2fs %8d[%3d]f: P: %2d, N: %2d, T+: %2d, T-: %2d, #O: %2d, %s\n",
541 (r->
tail_state == CONT_AD_STATE_SIL) ?
"--" :
"Sp");
551 sil2speech_transition(r, frm);
556 speech2sil_transition(r, frm);
592 max_siglvl(
cont_ad_t * r, int32 startfrm, int32 nfrm)
598 for (i = 0, f = startfrm; i < nfrm; i++, f++) {
599 if (f >= CONT_AD_ADFRMSIZE)
600 f -= CONT_AD_ADFRMSIZE;
615 get_audio_data(
cont_ad_t * r, int16 * buf, int32 max)
622 cont_ad_read_log(
cont_ad_t * r, int32 retval)
626 fprintf(r->
logfp,
"return from cont_ad_read() -> %d:\n", retval);
628 fprintf(r->
logfp,
"\tread_ts: %d (%.2fs)\n",
630 fprintf(r->
logfp,
"\tseglen: %d (%.2fs)\n",
645 fprintf(r->
logfp,
"\tspseg:");
646 for (seg = r->
spseg_head; seg; seg = seg->next)
647 fprintf(r->
logfp,
" %d[%d]", seg->startfrm, seg->nfrm);
648 fprintf(r->
logfp,
"\n");
663 buf_copy(
cont_ad_t * r, int32 sf, int32 nf, int16 * buf)
667 assert((sf >= 0) && (sf < CONT_AD_ADFRMSIZE));
670 if (sf + nf > CONT_AD_ADFRMSIZE) {
672 f = CONT_AD_ADFRMSIZE - sf;
674 memcpy(buf, r->
adbuf + (sf * r->
spf), l *
sizeof(int16));
678 "return %d speech frames [%d..%d]; %d samples\n",
679 f, sf, sf + f - 1, l);
689 memcpy(buf, r->
adbuf + (sf * r->
spf), l *
sizeof(int16));
693 "return %d speech frames [%d..%d]; %d samples\n",
694 nf, sf, sf + nf - 1, l);
698 if ((sf + nf) >= CONT_AD_ADFRMSIZE) {
699 assert((sf + nf) == CONT_AD_ADFRMSIZE);
716 cont_ad_read_internal(
cont_ad_t *r, int16 *buf, int32 max)
718 int32 head, tail, len, l;
728 assert((len >= 0) && (len < r->spf));
730 if ((tail < r->adbufsize) && (!r->
eof)) {
733 (*(r->adfunc)) (r->
ad, r->
adbuf + tail,
748 memcpy(r->
adbuf + tail, buf, l *
sizeof(int16));
751 if ((l > 0) && r->
rawfp) {
752 fwrite(r->
adbuf + tail,
sizeof(int16), l, r->
rawfp);
765 (*(r->adfunc)) (r->
ad,
766 r->
adbuf + tail, head - tail)) < 0) {
775 memcpy(r->
adbuf + tail, buf, l *
sizeof(int16));
777 if ((l > 0) && r->
rawfp) {
778 fwrite(r->
adbuf + tail,
sizeof(int16), l, r->
rawfp);
795 cont_ad_classify(
cont_ad_t *r, int32 len)
800 if (tailfrm >= CONT_AD_ADFRMSIZE)
801 tailfrm -= CONT_AD_ADFRMSIZE;
803 for (; len >= r->
spf; len -= r->
spf) {
804 compute_frame_pow(r, tailfrm);
812 boundary_detect(r, tailfrm);
814 if (++tailfrm >= CONT_AD_ADFRMSIZE)
836 if (f >= CONT_AD_ADFRMSIZE)
846 if (f >= CONT_AD_ADFRMSIZE)
865 int32 flen, len, retval, newstate;
868 if ((r == NULL) || (buf == NULL))
873 (
"cont_ad_read requires buffer of at least %d samples\n",
879 fprintf(r->
logfp,
"cont_ad_read(,, %d)\n", max);
884 len = cont_ad_read_internal(r, buf, max);
887 cont_ad_classify(r, len);
923 if ((seg == NULL) || (r->
headfrm != seg->startfrm)) {
938 flen = seg->startfrm - r->
headfrm;
940 flen += CONT_AD_ADFRMSIZE;
945 int32 f = max / r->
spf;
950 newstate = CONT_AD_STATE_SIL;
954 if (flen > seg->nfrm)
957 newstate = CONT_AD_STATE_SPEECH;
964 if ((newstate == CONT_AD_STATE_SIL) && (!r->
rawmode)) {
967 if (r->
headfrm >= CONT_AD_ADFRMSIZE)
968 r->
headfrm -= CONT_AD_ADFRMSIZE;
984 if (r->
state == newstate)
990 if (newstate == CONT_AD_STATE_SPEECH) {
992 assert(seg->startfrm >= 0);
997 && (seg->next || (r->
tail_state == CONT_AD_STATE_SIL))) {
999 if (seg->next == NULL)
1012 cont_ad_read_log(r, retval);
1024 int32 i, s, k, len, tailfrm;
1030 for (i = 0; i < CONT_AD_POWHISTSIZE; i++)
1033 if (tailfrm >= CONT_AD_ADFRMSIZE)
1034 tailfrm -= CONT_AD_ADFRMSIZE;
1035 s = (tailfrm * r->
spf);
1043 if ((k = (*(r->adfunc)) (r->
ad, r->
adbuf + s, len)) < 0)
1050 compute_frame_pow(r, tailfrm);
1054 return find_thresh(r);
1060 return r->
spf * CONT_AD_CALIB_FRAMES;
1066 int32 i, s, len, tailfrm;
1073 for (i = 0; i < CONT_AD_POWHISTSIZE; i++)
1078 if (tailfrm >= CONT_AD_ADFRMSIZE)
1079 tailfrm -= CONT_AD_ADFRMSIZE;
1080 s = (tailfrm * r->
spf);
1087 memcpy(r->
adbuf + s, buf, len *
sizeof(int16));
1090 compute_frame_pow(r, tailfrm);
1094 return find_thresh(r);
1105 if ((sil < 0) || (speech < 0)) {
1107 "cont_ad_set_thresh: invalid threshold arguments: %d, %d\n",
1127 int32 delta_speech, int32 min_noise,
1128 int32 max_noise, int32 winsize,
1129 int32 speech_onset, int32 sil_onset, int32 leader,
1130 int32 trailer, float32 adapt_rate)
1132 if ((delta_sil < 0) || (delta_speech < 0) || (min_noise < 0)
1133 || (max_noise < 0)) {
1134 E_ERROR(
"threshold arguments: "
1135 "%d, %d, %d, %d must all be >=0\n", delta_sil,
1136 delta_speech, min_noise, max_noise);
1140 if ((speech_onset > winsize) || (speech_onset <= 0)
1141 || (winsize <= 0)) {
1143 (
"speech_onset, %d, must be <= winsize, %d, and both >0\n",
1144 speech_onset, winsize);
1148 if ((sil_onset > winsize) || (sil_onset <= 0) || (winsize <= 0)) {
1150 (
"sil_onset, %d, must be <= winsize, %d, and both >0\n",
1151 sil_onset, winsize);
1155 if (((leader + trailer) > winsize) || (leader <= 0)
1156 || (trailer <= 0)) {
1158 (
"leader, %d, plus trailer, %d, must be <= winsize, %d, and both >0\n",
1159 leader, trailer, winsize);
1163 if ((adapt_rate < 0.0) || (adapt_rate > 1.0)) {
1164 E_ERROR(
"adapt_rate, %e; must be in range 0..1\n", adapt_rate);
1200 int32 * delta_speech, int32 * min_noise,
1201 int32 * max_noise, int32 * winsize,
1202 int32 * speech_onset, int32 * sil_onset,
1203 int32 * leader, int32 * trailer, float32 * adapt_rate)
1205 if (!delta_sil || !delta_speech || !min_noise || !max_noise
1206 || !winsize || !speech_onset || !sil_onset || !leader
1207 || !trailer || !adapt_rate) {
1208 fprintf(stderr,
"cont_ad_get_params: some param slots are NULL\n");
1294 int32(*func) (
ad_rec_t *, int16 *, int32))
1323 if (f >= CONT_AD_ADFRMSIZE)
1327 else if (r->
tail_state == CONT_AD_STATE_SPEECH) {
1333 if (f >= CONT_AD_ADFRMSIZE)
1378 if ((r = malloc(
sizeof(*r))) == NULL) {
1391 r->
sps = CONT_AD_SPS;
1394 r->
spf = (r->
sps * 256) / CONT_AD_SPS;
1403 calloc(CONT_AD_POWHISTSIZE,
sizeof(*r->
pow_hist))) == NULL) {
1410 calloc(CONT_AD_ADFRMSIZE,
sizeof(*r->
frm_pow))) == NULL) {
1418 r->
state = CONT_AD_STATE_SIL;
1434 r->
leader = CONT_AD_LEADER;
1460 int32(*func) (
ad_rec_t *, int16 *, int32))