ubuntuusers.de

ogi axk diphone.scm

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;<--OHSU-->;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;                                                                        ;;
;;                   Oregon Health & Science University                   ;;
;;                Division of Biomedical Computer Science                 ;;
;;                Center for Spoken Language Understanding                ;;
;;                         Portland, OR USA                               ;;
;;                        Copyright (c) 2000                              ;;
;;                                                                        ;;
;;      This module is not part of the CSTR/University of Edinburgh       ;;
;;               release of the Festival TTS system.                      ;;
;;                                                                        ;;
;;  In addition to any conditions disclaimers below, please see the file  ;;
;;  "NE Copyright Materials License.txt" distributed with this software   ;;
;;  for information on usage and redistribution, and for a DISCLAIMER OF  ;;
;;  ALL WARRANTIES.                                                       ;;
;;                                                                        ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;<--OHSU-->;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;                                                                        ;;
;;       A German Voice for the Festival Speech Synthesis System          ;;
;;        Authors: Karin Mueller, Bettina Saeuberlich, Horst Meyer,       ;;
;;                 Mike Macon and Alan W Black                            ;;
;;          Date:  OGI, June/July 1998                                    ;;
;;                                                                        ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(defvar ogi_axk_diphone_dir (cdr (assoc 'ogi_axk_diphone voice-locations))
  "ogi_axk_diphone_dir
  The default directory for the axk diphone database.")


;; for german text pre-processing
(set! load-path (cons (path-append libdir "ogi") load-path))
(set! load-path (cons (path-append libdir "ogi/ogi_german/") load-path))
(set! load-path (cons (path-append libdir "german") load-path))

;(require 'ims_german_preproc)
(require 'ims_german_phones)
(require 'ogi_german_map)
(require 'ims_german_POS)
(require 'ims_german_lexicons)
(require 'ims_german_token)
(require 'ims_german_duration)
(require 'ims_german_postlex)  
(require 'ogi_unitsel)
(require 'ogi_synthesis)
(require 'tobi)
(require 'f2bf0lr) 
;;; Use the grouped file by default
(defvar axkdi_grouped_or_ungrouped 'grouped)

;;;  Full voice definition 
(define (voice_ogi_axk_diphone)
"(voice_ogi_axk_diphone)
Set up synthesis for male German speaker: using OGIresLPC synth"

  (voice_reset)

  (Parameter.set 'Language 'german)
  ;; Phone set
  (Parameter.set 'PhoneSet 'german_sampa)
  (PhoneSet.select 'german_sampa)

  ;;Utterance-chunking
  (require 'ims_german_token)
  (set! eou_tree german_eou_tree) 

  ;; Tokenization
  (Param.set 'Token_Method 'Token_German)
  (set! token.punctuation  "\"'`.,:;!?(){}[]-")
  (set! token.prepunctuation "\"'`({[ž")

  ;;Token_POS-prediction
  (require 'ims_german_tokenpos)
  (set! token_pos_cart_trees german_token_pos_cart_trees) 

  ;; no POS tagger for basic voice
  (Param.set 'POS_Method 'No_POS)
  (Param.set 'pos_tagset 'ustts) ; main categories of the STTS tagset

  ;; Phrase break prediction by punctuation
  (require 'ims_german_phrasify)
  (Param.set 'Phrasify_Method 'Phrasify_German)
  (set! phrase_cart_tree german_phrase_cart_tree) 
  (Param.set 'Phrase_Method 'cart_tree)

  ;; Phrasing
  (set! phrase_cart_tree simple_phrase_cart_tree)
  (Parameter.set 'Phrase_Method 'cart_tree)

  ;; Lexicon selection
  (setup_bomp_lex)
  (lex.select "bomp")
  (require 'ims_german_postlex)
  (set! postlex_rules_hooks german_postlex)

  ;; Accent and tone prediction
  (set! int_tone_cart_tree f2b_int_tone_cart_tree)
  (set! int_accent_cart_tree f2b_int_accent_cart_tree)
  (set! f0_lr_start f2b_f0_lr_start)
  (set! f0_lr_mid f2b_f0_lr_mid)
  (set! f0_lr_end f2b_f0_lr_end)
  (Parameter.set 'Int_Method Intonation_Tree)
  (set! int_lr_params
	'((target_f0_mean 120) (target_f0_std 16)
	  (model_f0_mean 170) (model_f0_std 34)))
  (Parameter.set 'Int_Target_Method Int_Targets_LR)

  ;; Duration prediction
  (Param.set 'Duration_Method 'Tree_ZScores)
  (set! duration_cart_tree german_duration_cart_tree)
  (set! duration_ph_info german_duration_ph_info_sampa)  
  (Param.set 'Duration_Stretch 1.1)

  ;; diphone unit selection fallbacks
  (set! ogi_di_alt_L '((x (C)) (C (x)) (b (p)) (g (k)) (d (t)) (v (f)) (? (_)) (h (_))))
  (set! ogi_di_alt_R '((x (C)) (C (x)) (b (p)) (g (k)) (d (t)) (v (f)) (? (_)) (h (_))))
  (set! ogi_di_default "pau-h")

  ;; Postlexical rules
;  (set! postlex_rules_hooks 
;	(list Stop_before_Vowel Stop_after_Prefix))

  ;; Waveform synthesizer: diphones
  (Parameter.set 'Synth_Method 'OGIdiphone)
  (OGIdbase.activate "ogi_axk_diphone")      
  (OGIresLPC.init axkdi_OGI_syn_params)  
  (set! current-voice 'ogi_axk_diphone)  
)

(set! axkdi_smooth_spectra   ;;; max number of frames to use in smoothing LPC at joins
      '(spectra_smooth
        ((vc + 5)   ; vowel
         (ctype s 0) ; stop
         (ctype f 2) ; fricative
         (ctype a 0) ; affricate
         (ctype n 4) ; nasal
         (ctype l 4) ; lateral
         (ctype r 4) ; approximant
         )))
 
(set! axkdi_smooth_power   ;;; max number of frames to use in smoothing power at joins
      '(power_smooth
        ((vc + 10)   ; vowel
         (ctype s 0) ; stop
         (ctype f 2) ; fricative
         (ctype a 0) ; affricate
         (ctype n 5) ; nasal
         (ctype l 5) ; lateral
         (ctype r 5) ; approximant 
         )))
 
;;
;;  Residual LPC synthesizer parameters - used by OGIresLPC.init
;;
(set! axkdi_OGI_syn_params  
 (list
  '(F0_default 50.0)      ;; default Fo used if can't find any Fo targets
  '(T0_UV_thresh 0.020)   ;; used to make V/UV decision 
  '(T0_UV_pm 0.010)       ;; used to place UV pmarks
  '(post_gain 1.0)        ;; adjust final loudness
  '(deemphasis 0.94)      ;; opposite of preemphasis (lowpass filter)
  '(mod_method "direct")  ;; method for realizing prosodic targets
;  '(mod_method "none")  ;; method for realizing prosodic targets
  '(beta_smooth 5)        ;; smoother len for pitch mod factor in "soft" mod method
  '(smooth_cross_ph_join "Y") ;; smooth across joins at phone boundaries?
  '(window_type "trapezoid") 
  '(spectra_match_or_replace "match") ;; for join smoothing
  '(power_match_or_replace   "match") ;; for join smoothing
  axkdi_smooth_spectra
  axkdi_smooth_power
  ))
 
 
(define (init_ogi_axk_diphone)
"(init_ogi_axk_diphone)
  Initialise the AXK diphone database.  This sets up the 16K version
  for residual excited LPC."
  (set! voicename 'ogi_axk_diphone)

(if (equal? axkdi_grouped_or_ungrouped 'ungrouped)
 
    ;;  ungrouped
    (OGIdbase.init
     (list
      '(dbname ogi_axk_diphone)
      (list 'unitdic_file (path-append ogi_axk_diphone_dir "festvox" "axkdiph.msec"))
;      (list 'gain_file (path-append ogi_axk_diphone_dir "festvox" "axkgain.dat"))
      '(phoneset "sampa")  
      '(base_dir "/u/macon/TTS/tts_data/axk/")
      '(lpc_dir "lpc16/")
      '(lpc_ext ".lpc")
      '(exc_dir "lpc16/")
      '(exc_ext ".res")
      '(pm_dir "pm/")
      '(pm_ext ".pmv")
      '(data_type "resLPC")
      '(access_mode "ondemand")
      '(samp_freq 16000)
      '(sig_band 0.010)
      '(isCompressed "Y") ;; if "Y", compress when saving group file
      '(preemph 0.96)  
      ))
 
    ;;  grouped -- parameters set here override those in groupfile
    (OGIdbase.init   
     (list
      '(dbname ogi_axk_diphone)
      (list 'groupfile (path-append ogi_axk_diphone_dir "group" (string-append voicename "_reslpc.group")))
      '(data_type "resLPC")
      '(access_mode "ondemand")
      ))
    t)
)


(init_ogi_axk_diphone)
 
 
(proclaim_voice
 'ogi_axk_diphone
 '((language german)
   (gender male)
   (dialect berlin+usa)
   (description
    "  This voice provides a German male using a
     residual excited LPC diphone synthesis module created at
     OGI, summer 1998.  
       Diese Festival Version wurde fuer das Deutsche, an dem Oregon
     Graduierten Institut von Karin Mueller, Bettina Saeuberlich, Horst
     Meyer, Alan Black, und Mike Macon entwickelt.")))

(provide 'ogi_axk_diphone)



Anhang herunterladen

Diese Revision wurde am 24. November 2021 15:40 von ubuntuusers erstellt.