Anhang: pdf2mp3.py

#!/usr/bin/env python
# -*- coding: utf8 -*-

# ###################################################
# pdf2mp3.py - little script/program to convert a
# pdf-file or ascii-file (.dat, .txt) into a mp3 audio or wav file
#
# Copyright (C) 2010 Hannes Rennau
# hannes@bolding-burchard.com
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the
# Free Software Foundation, Inc.,
# 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
# ###################################################

# LIST OF PACKAGES NEEDED:
# you need to install the following packages:
# sudo apt-get install python poppler-utils festival festvox-rablpc16k
# lame espeak 

# HOW TO USE:
# 1.create a file with the name pdf2mp3 and copy the content of
#   the whole text in there
# 2.make the file an executable via:
#   >>> chmod +x pdf2mp3
# 3.copy file to /usr/bin to make usage of program possible from everywhere
# on your computer:  >>> sudo cp pdf2mp3 /usr/bin/
# 4.after that get help calling: 
#   pdf2mp3 -h
#  
# 5.EXAMPLE:
#   you want to convert yourfilename.pdf into a mp3 file, then just type:
#   pdf2mp3 -v en -f yourfilename.pdf -o yourfilename.mp3   
#   (for the english voice 'en', for german voice 'de',
#   type: espeak --voices to get list of voices available on your system)
#
#
# edited by busfahrer, November 2010

import os,sys
import string
import re
from subprocess import call
from optparse import OptionParser as op

def main():
    """Parses command line
    """
    

    parser = op(usage='%prog -s integer[optional] -g integer[optional]         \
                       -v [de,en,...] -f filename[.pdf|.txt|.dat]              \
                       -o filename[.wav|.mp3] [optional: --ascii]',
    description='This script convertes ASCII files (basically those files with \
                 extension .txt or .dat) or pdf files into an mp3 (or wav) \
                 audio file.',version=r'$v0.5$')

    parser.add_option('-s','--speed', type='string', metavar='INTEGER',
                      help='Speed in words per minute, 80 to 390,              \
                      default is 170.')

    parser.add_option('-g','--gap', type='string', metavar='INTEGER',
                      help='Word gap.Pause between words,units of 10mS at the  \
                      default speed.')

    parser.add_option('-v','--voice', type='string',metavar='VOICENAME',
                      help='name of the voice to be used.\
                      type: ***espeak --voices*** to get list of available \
                      voices on your system.')
    parser.add_option('-f','--file',  type='string',metavar='SOURCEFILENAME',
                      help='input path of file to read (and late on convert to \
                      audio file).This can be a pdf or                         \
                      ascii (.txt or .dat) file. extension must be given!')
    parser.add_option('-o','--output', type='string',metavar='OUTPUTFILENAME',
                      help='Output filename (with extension .wav or .mp3 that \
                      the program knows which audio format you want.)')

    options,args = parser.parse_args()

    if options.voice is None:
        print 'no voice name given, use -v voicename \
              [type ***espeak --voices*** for list of available voices]'
        return 2


    filename_inp = str(options.file)
    filename_out = str(options.output)
    language = str(options.voice)
    speed = str(options.speed)
    gapless = str(options.gap)
    result = convert(options.file, options.output, options.voice, options.speed, options.gap)
    if result is not None:
        print result
        return 2

def convert(filename_inp, filename_out, language, speed, gapless):
    if not os.path.isfile(filename_inp):
        return '*** input file %s does not exist ***' % filename_inp

    extension_out = os.path.splitext(filename_out)[1]
    if extension_out not in('.wav', '.mp3'):
        return 'please decide whether you want wav or mp3 format by \
               typing -o filename.wav or -o filename.mp3'

    extension = os.path.splitext(filename_inp)[1]

    if extension not in('.dat', '.txt', '.pdf'):
        return '*** input file does not have extension (.txt, .dat, .pdf) ***'

    if extension == '.pdf':
        print 'converting pdf file: %s to ASCII\n' % filename_inp
        pdf_convert_to_ascii(filename_inp)
        filename_inp = "%s.txt" % filename_inp[:-len(extension)]

    convert_to_wav(filename_inp, filename_out, language, speed, gapless)
    if extension_out == '.mp3':
        convert_wav_2_mp3(filename_out)

def pdf_convert_to_ascii(input_pdf_file):
    call(['pdftotext', input_pdf_file, input_pdf_file[:-4] + '.txt'])

def convert_to_wav(filename_inp, filename_out, language, speed, gapless):
    print 'converting %s to %s\n'% (filename_inp, filename_out[:-4] + '.wav')
    with open(filename_inp, 'r') as infile:
        text = infile.read()
    os.remove(filename_inp)
    edited_text = re.sub('[^a-zA-Z .,!?\n]', '', text)
    with open('edited_text.txt', 'w') as f:
        f.write(edited_text)
    cmd = ['espeak', '-v', language, '-f', 'edited_text.txt', '-w',
           filename_out[:-4] + '.wav']
    if speed is not None:
        cmd.insert(1, speed)
        cmd.insert(1, '-s')
    if gapless is not None:
        cmd.insert(1, gapless)
        cmd.insert(1, '-g')
    call(cmd)
    os.remove('edited_text.txt')

def convert_wav_2_mp3(filename_out):
    print 'converting %s to %s\n'% (filename_out[:-4] + '.wav',
                                    filename_out[:-4] + '.mp3')
    retcode = call(['lame', '-f', filename_out[:-4] + '.wav',
                    filename_out[:-4] + '.mp3'])
    if retcode == 0:
        print 'Datei %s wurde erfolgreich erstellt'% filename_out
        os.remove(filename_out[:-4] + '.wav')


if __name__=='__main__':
    ret = main()
    sys.exit(ret)
Anhang herunterladen
Diese Revision wurde am 26. November 2010 11:40 von busfahrer erstellt.