Anhang: ogi axk diphone.scm

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;<--OHSU-->;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;                                                                        ;;
;;                   Oregon Health & Science University                   ;;
;;                Division of Biomedical Computer Science                 ;;
;;                Center for Spoken Language Understanding                ;;
;;                         Portland, OR USA                               ;;
;;                        Copyright (c) 2000                              ;;
;;                                                                        ;;
;;      This module is not part of the CSTR/University of Edinburgh       ;;
;;               release of the Festival TTS system.                      ;;
;;                                                                        ;;
;;  In addition to any conditions disclaimers below, please see the file  ;;
;;  "NE Copyright Materials License.txt" distributed with this software   ;;
;;  for information on usage and redistribution, and for a DISCLAIMER OF  ;;
;;  ALL WARRANTIES.                                                       ;;
;;                                                                        ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;<--OHSU-->;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;                                                                        ;;
;;       A German Voice for the Festival Speech Synthesis System          ;;
;;        Authors: Karin Mueller, Bettina Saeuberlich, Horst Meyer,       ;;
;;                 Mike Macon and Alan W Black                            ;;
;;          Date:  OGI, June/July 1998                                    ;;
;;                                                                        ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(defvar ogi_axk_diphone_dir (cdr (assoc 'ogi_axk_diphone voice-locations))
  "ogi_axk_diphone_dir
  The default directory for the axk diphone database.")


;; for german text pre-processing
(set! load-path (cons (path-append libdir "ogi") load-path))
(set! load-path (cons (path-append libdir "ogi/ogi_german/") load-path))
(set! load-path (cons (path-append libdir "german") load-path))

;(require 'ims_german_preproc)
(require 'ims_german_phones)
(require 'ogi_german_map)
(require 'ims_german_POS)
(require 'ims_german_lexicons)
(require 'ims_german_token)
(require 'ims_german_duration)
(require 'ims_german_postlex)  
(require 'ogi_unitsel)
(require 'ogi_synthesis)
(require 'tobi)
(require 'f2bf0lr) 
;;; Use the grouped file by default
(defvar axkdi_grouped_or_ungrouped 'grouped)

;;;  Full voice definition 
(define (voice_ogi_axk_diphone)
"(voice_ogi_axk_diphone)
Set up synthesis for male German speaker: using OGIresLPC synth"

  (voice_reset)

  (Parameter.set 'Language 'german)
  ;; Phone set
  (Parameter.set 'PhoneSet 'german_sampa)
  (PhoneSet.select 'german_sampa)

  ;;Utterance-chunking
  (require 'ims_german_token)
  (set! eou_tree german_eou_tree) 

  ;; Tokenization
  (Param.set 'Token_Method 'Token_German)
  (set! token.punctuation  "\"'`.,:;!?(){}[]-")
  (set! token.prepunctuation "\"'`({[ž")

  ;;Token_POS-prediction
  (require 'ims_german_tokenpos)
  (set! token_pos_cart_trees german_token_pos_cart_trees) 

  ;; no POS tagger for basic voice
  (Param.set 'POS_Method 'No_POS)
  (Param.set 'pos_tagset 'ustts) ; main categories of the STTS tagset

  ;; Phrase break prediction by punctuation
  (require 'ims_german_phrasify)
  (Param.set 'Phrasify_Method 'Phrasify_German)
  (set! phrase_cart_tree german_phrase_cart_tree) 
  (Param.set 'Phrase_Method 'cart_tree)

  ;; Phrasing
  (set! phrase_cart_tree simple_phrase_cart_tree)
  (Parameter.set 'Phrase_Method 'cart_tree)

  ;; Lexicon selection
  (setup_bomp_lex)
  (lex.select "bomp")
  (require 'ims_german_postlex)
  (set! postlex_rules_hooks german_postlex)

  ;; Accent and tone prediction
  (set! int_tone_cart_tree f2b_int_tone_cart_tree)
  (set! int_accent_cart_tree f2b_int_accent_cart_tree)
  (set! f0_lr_start f2b_f0_lr_start)
  (set! f0_lr_mid f2b_f0_lr_mid)
  (set! f0_lr_end f2b_f0_lr_end)
  (Parameter.set 'Int_Method Intonation_Tree)
  (set! int_lr_params
	'((target_f0_mean 120) (target_f0_std 16)
	  (model_f0_mean 170) (model_f0_std 34)))
  (Parameter.set 'Int_Target_Method Int_Targets_LR)

  ;; Duration prediction
  (Param.set 'Duration_Method 'Tree_ZScores)
  (set! duration_cart_tree german_duration_cart_tree)
  (set! duration_ph_info german_duration_ph_info_sampa)  
  (Param.set 'Duration_Stretch 1.1)

  ;; diphone unit selection fallbacks
  (set! ogi_di_alt_L '((x (C)) (C (x)) (b (p)) (g (k)) (d (t)) (v (f)) (? (_)) (h (_))))
  (set! ogi_di_alt_R '((x (C)) (C (x)) (b (p)) (g (k)) (d (t)) (v (f)) (? (_)) (h (_))))
  (set! ogi_di_default "pau-h")

  ;; Postlexical rules
;  (set! postlex_rules_hooks 
;	(list Stop_before_Vowel Stop_after_Prefix))

  ;; Waveform synthesizer: diphones
  (Parameter.set 'Synth_Method 'OGIdiphone)
  (OGIdbase.activate "ogi_axk_diphone")      
  (OGIresLPC.init axkdi_OGI_syn_params)  
  (set! current-voice 'ogi_axk_diphone)  
)

(set! axkdi_smooth_spectra   ;;; max number of frames to use in smoothing LPC at joins
      '(spectra_smooth
        ((vc + 5)   ; vowel
         (ctype s 0) ; stop
         (ctype f 2) ; fricative
         (ctype a 0) ; affricate
         (ctype n 4) ; nasal
         (ctype l 4) ; lateral
         (ctype r 4) ; approximant
         )))
 
(set! axkdi_smooth_power   ;;; max number of frames to use in smoothing power at joins
      '(power_smooth
        ((vc + 10)   ; vowel
         (ctype s 0) ; stop
         (ctype f 2) ; fricative
         (ctype a 0) ; affricate
         (ctype n 5) ; nasal
         (ctype l 5) ; lateral
         (ctype r 5) ; approximant 
         )))
 
;;
;;  Residual LPC synthesizer parameters - used by OGIresLPC.init
;;
(set! axkdi_OGI_syn_params  
 (list
  '(F0_default 50.0)      ;; default Fo used if can't find any Fo targets
  '(T0_UV_thresh 0.020)   ;; used to make V/UV decision 
  '(T0_UV_pm 0.010)       ;; used to place UV pmarks
  '(post_gain 1.0)        ;; adjust final loudness
  '(deemphasis 0.94)      ;; opposite of preemphasis (lowpass filter)
  '(mod_method "direct")  ;; method for realizing prosodic targets
;  '(mod_method "none")  ;; method for realizing prosodic targets
  '(beta_smooth 5)        ;; smoother len for pitch mod factor in "soft" mod method
  '(smooth_cross_ph_join "Y") ;; smooth across joins at phone boundaries?
  '(window_type "trapezoid") 
  '(spectra_match_or_replace "match") ;; for join smoothing
  '(power_match_or_replace   "match") ;; for join smoothing
  axkdi_smooth_spectra
  axkdi_smooth_power
  ))
 
 
(define (init_ogi_axk_diphone)
"(init_ogi_axk_diphone)
  Initialise the AXK diphone database.  This sets up the 16K version
  for residual excited LPC."
  (set! voicename 'ogi_axk_diphone)

(if (equal? axkdi_grouped_or_ungrouped 'ungrouped)
 
    ;;  ungrouped
    (OGIdbase.init
     (list
      '(dbname ogi_axk_diphone)
      (list 'unitdic_file (path-append ogi_axk_diphone_dir "festvox" "axkdiph.msec"))
;      (list 'gain_file (path-append ogi_axk_diphone_dir "festvox" "axkgain.dat"))
      '(phoneset "sampa")  
      '(base_dir "/u/macon/TTS/tts_data/axk/")
      '(lpc_dir "lpc16/")
      '(lpc_ext ".lpc")
      '(exc_dir "lpc16/")
      '(exc_ext ".res")
      '(pm_dir "pm/")
      '(pm_ext ".pmv")
      '(data_type "resLPC")
      '(access_mode "ondemand")
      '(samp_freq 16000)
      '(sig_band 0.010)
      '(isCompressed "Y") ;; if "Y", compress when saving group file
      '(preemph 0.96)  
      ))
 
    ;;  grouped -- parameters set here override those in groupfile
    (OGIdbase.init   
     (list
      '(dbname ogi_axk_diphone)
      (list 'groupfile (path-append ogi_axk_diphone_dir "group" (string-append voicename "_reslpc.group")))
      '(data_type "resLPC")
      '(access_mode "ondemand")
      ))
    t)
)


(init_ogi_axk_diphone)
 
 
(proclaim_voice
 'ogi_axk_diphone
 '((language german)
   (gender male)
   (dialect berlin+usa)
   (description
    "  This voice provides a German male using a
     residual excited LPC diphone synthesis module created at
     OGI, summer 1998.  
       Diese Festival Version wurde fuer das Deutsche, an dem Oregon
     Graduierten Institut von Karin Mueller, Bettina Saeuberlich, Horst
     Meyer, Alan Black, und Mike Macon entwickelt.")))

(provide 'ogi_axk_diphone)
Anhang herunterladen
Diese Revision wurde am 24. November 2021 15:40 von ubuntuusers erstellt.