ubuntuusers.de

Anhang: pdf2mp3.py

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
#!/usr/bin/env python
# -*- coding: utf8 -*-

# ###################################################
# pdf2mp3.py - little script/program to convert a
# pdf-file or ascii-file (.dat, .txt) into a mp3 audio or wav file
#
# Copyright (C) 2010 Hannes Rennau
# hannes@bolding-burchard.com
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the
# Free Software Foundation, Inc.,
# 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
# ###################################################

# LIST OF PACKAGES NEEDED:
# you need to install the following packages:
# sudo apt-get install python poppler-utils festival festvox-rablpc16k
# lame espeak 

# HOW TO USE:
# 1.create a file with the name pdf2mp3 and copy the content of
#   the whole text in there
# 2.make the file an executable via:
#   >>> chmod +x pdf2mp3
# 3.copy file to /usr/bin to make usage of program possible from everywhere
# on your computer:  >>> sudo cp pdf2mp3 /usr/bin/
# 4.after that get help calling: 
#   pdf2mp3 -h
#  
# 5.EXAMPLE:
#   you want to convert yourfilename.pdf into a mp3 file, then just type:
#   pdf2mp3 -v en -f yourfilename.pdf -o yourfilename.mp3   
#   (for the english voice 'en', for german voice 'de',
#   type: espeak --voices to get list of voices available on your system)
#
#
# edited by busfahrer, November 2010

import os,sys
import string
import re
from subprocess import call
from optparse import OptionParser as op

def main():
    """Parses command line
    """
    

    parser = op(usage='%prog -s integer[optional] -g integer[optional]         \
                       -v [de,en,...] -f filename[.pdf|.txt|.dat]              \
                       -o filename[.wav|.mp3] [optional: --ascii]',
    description='This script convertes ASCII files (basically those files with \
                 extension .txt or .dat) or pdf files into an mp3 (or wav) \
                 audio file.',version=r'$v0.5$')

    parser.add_option('-s','--speed', type='string', metavar='INTEGER',
                      help='Speed in words per minute, 80 to 390,              \
                      default is 170.')

    parser.add_option('-g','--gap', type='string', metavar='INTEGER',
                      help='Word gap.Pause between words,units of 10mS at the  \
                      default speed.')

    parser.add_option('-v','--voice', type='string',metavar='VOICENAME',
                      help='name of the voice to be used.\
                      type: ***espeak --voices*** to get list of available \
                      voices on your system.')
    parser.add_option('-f','--file',  type='string',metavar='SOURCEFILENAME',
                      help='input path of file to read (and late on convert to \
                      audio file).This can be a pdf or                         \
                      ascii (.txt or .dat) file. extension must be given!')
    parser.add_option('-o','--output', type='string',metavar='OUTPUTFILENAME',
                      help='Output filename (with extension .wav or .mp3 that \
                      the program knows which audio format you want.)')

    options,args = parser.parse_args()

    if options.voice is None:
        print 'no voice name given, use -v voicename \
              [type ***espeak --voices*** for list of available voices]'
        return 2


    filename_inp = str(options.file)
    filename_out = str(options.output)
    language = str(options.voice)
    speed = str(options.speed)
    gapless = str(options.gap)
    result = convert(options.file, options.output, options.voice, options.speed, options.gap)
    if result is not None:
        print result
        return 2

def convert(filename_inp, filename_out, language, speed, gapless):
    if not os.path.isfile(filename_inp):
        return '*** input file %s does not exist ***' % filename_inp

    extension_out = os.path.splitext(filename_out)[1]
    if extension_out not in('.wav', '.mp3'):
        return 'please decide whether you want wav or mp3 format by \
               typing -o filename.wav or -o filename.mp3'

    extension = os.path.splitext(filename_inp)[1]

    if extension not in('.dat', '.txt', '.pdf'):
        return '*** input file does not have extension (.txt, .dat, .pdf) ***'

    if extension == '.pdf':
        print 'converting pdf file: %s to ASCII\n' % filename_inp
        pdf_convert_to_ascii(filename_inp)
        filename_inp = "%s.txt" % filename_inp[:-len(extension)]

    convert_to_wav(filename_inp, filename_out, language, speed, gapless)
    if extension_out == '.mp3':
        convert_wav_2_mp3(filename_out)

def pdf_convert_to_ascii(input_pdf_file):
    call(['pdftotext', input_pdf_file, input_pdf_file[:-4] + '.txt'])

def convert_to_wav(filename_inp, filename_out, language, speed, gapless):
    print 'converting %s to %s\n'% (filename_inp, filename_out[:-4] + '.wav')
    with open(filename_inp, 'r') as infile:
        text = infile.read()
    os.remove(filename_inp)
    edited_text = re.sub('[^a-zA-Z .,!?\n]', '', text)
    with open('edited_text.txt', 'w') as f:
        f.write(edited_text)
    cmd = ['espeak', '-v', language, '-f', 'edited_text.txt', '-w',
           filename_out[:-4] + '.wav']
    if speed is not None:
        cmd.insert(1, speed)
        cmd.insert(1, '-s')
    if gapless is not None:
        cmd.insert(1, gapless)
        cmd.insert(1, '-g')
    call(cmd)
    os.remove('edited_text.txt')

def convert_wav_2_mp3(filename_out):
    print 'converting %s to %s\n'% (filename_out[:-4] + '.wav',
                                    filename_out[:-4] + '.mp3')
    retcode = call(['lame', '-f', filename_out[:-4] + '.wav',
                    filename_out[:-4] + '.mp3'])
    if retcode == 0:
        print 'Datei %s wurde erfolgreich erstellt'% filename_out
        os.remove(filename_out[:-4] + '.wav')


if __name__=='__main__':
    ret = main()
    sys.exit(ret)
Anhang herunterladen

Diese Revision wurde am 26. November 2010 11:40 von busfahrer erstellt.