#!/usr/bin/python
# npr.py - download NPR (and other public radio) stories and convert to MP3
#
# the only argument is the link to download; it looks different for different
# programs
#
import os, sys, time, re, urllib
from optparse import OptionParser
from xml import xpath
from xml.dom import minidom
from urlparse import urlparse, urlsplit

# use pyid3lib if it's available; get it at http://pyid3lib.sourceforge.net/
try:
    import pyid3lib
except:
    print "Could not import pyid3lib; continuing anyway"


def getmp3(mediaurl, result, id3data):
    """
    Download the media at mediaurl, and convert it to MP3 format

    This function uses mplayer to download the media and saves it as a .wav
    file.  When downloading is complete, the .wav file is converted to an
    .mp3 file using lame.

    @type mediaurl: string
    @param mediaurl: the URL of the media (ex. rtsp://server.com/file.rm)
    @type result: string
    @param result: filename (minus the extension) of the resulting MP3 file
    @type id3data: dictionary
    @param id3data: data for the ID3 tag.  Should contain title, artist, and
    year, at a minimum.
    """

    wavfile = result + ".wav"
    mp3file = result + ".mp3"

    print "Downloading", mediaurl

    # determine whether to use a FIFO
    usingfifo = hasattr(os, "mkfifo") and not options.nofifo and             \
                options.nonormalize
    if options.verbose:
        print "using FIFO:", usingfifo

    if usingfifo:
        os.mkfifo(wavfile)
        if options.verbose:
            print "Created FIFO", wavfile

    # if using a fifo, wavfile will refer to the fifo
    lamecmd = "lame --quiet --resample 22.05 --add-id3v2 "       +           \
                           "--tt \"" + id3data["title"]  + "\" " +           \
                           "--ta \"" + id3data["artist"] + "\" " +           \
                           "--ty "   + id3data["year"]   + " "   +           \
                           wavfile + " " + mp3file

    mplayercmd = "mplayer -really-quiet -nocache -ao pcm:waveheader "        \
                         "-ao pcm:file=" + wavfile + " " + mediaurl +        \
                         " > /dev/null 2> /dev/null"

    # if using a FIFO, lame is started first, then mplayer, and they run
    # concurrently.  Otherwise, mplayer is started, then normalize, then lame
    if usingfifo:
        lamecmd += "&"
        if options.verbose:
            print "Running", lamecmd, mplayercmd
        os.system(lamecmd)
        os.system(mplayercmd)
    else:
        if options.verbose:
            print "Running", mplayercmd
        os.system(mplayercmd)

        # normalize
        if not options.nonormalize:
            print "Normalizing " + wavfile
            normalizecmd = "normalize --quiet " + wavfile + " > /dev/null"
            if options.verbose:
                print "Running", normalizecmd
            os.system(normalizecmd)

        # convert to MP3
        print "Converting", wavfile, "->", mp3file
        if options.verbose:
            print "Running", lamecmd
        os.system(lamecmd)

    tagmp3(mp3file, id3data)

    # delete the wav file (or fifo)
    if not options.nodelete:
        print "Deleting", wavfile
        os.remove(wavfile)
    else:
        print "Did not delete", wavfile


def tagmp3(mp3file, id3data):
    """
    Tag the given mp3 file with ID3 tags

    This function uses pyid3lib to tag an mp3 file, which must exist.  If
    pyid3lib is not available, it exits without an error.

    @type mp3file: string
    @param mp3file: the filename of the mp3 file to tag
    @type id3data: dictionary
    @param id3data: data for the ID3 tag.  Should contain title, artist, and
    year, at a minimum.
    """

    # see if pyid3lib was imported; if so, tag can contain additional data
    if "pyid3lib" in sys.modules:
        # write ID3V2 tag with extra fields
        tag = pyid3lib.tag(mp3file)
        # description isn't part of pyid3lib; pop it from the list and insert it
        # if no description, default to the title
        desc = id3data.pop("desc", id3data["title"].encode("iso-8859-1"))
        tag.append({"frameid":"COMM", "text":desc})
        for key, value in id3data.iteritems():
            if key in dir(tag):
                setattr(tag, key, value.encode("iso-8859-1"))
        # update it with the value from the id3data dict
        if options.verbose:
            print "Tagging MP3 using pyid3lib"
        tag.update()



def npr(url):
    """
    Download an NPR story.  This works for most NPR shows (Morning Edition,
    All Things Considered, Day to Day, Fresh Air, etc.).

    @type url: string
    @param url: the URL of the story.  
     This should look like::
       http://www.npr.org/templates/story/story.php?storyId=4719492
     which is the "story page" of the story to download
    """

    if options.verbose:
        print "Handling", url, "as an NPR link"

    # NPR's XML generator
    xmlgen = "http://www.npr.org/templates/xanadu/xplayer.php"

    # prefix for NPR media
    mediaprefix = "http://pd.npr.org/anon.npr-mp3"

    # download the story page using urllib; url looks like
    # http://www.npr.org/templates/story/story.php?storyId=4719492
    webpage = urllib.urlopen(url).read()

    # search through the webpage for the description
    result = re.search("""<meta\ name="description"\ content="  # lead-in
                          (?P<desc>[^"]*)                       # description
                          "                                     # end
                       """, webpage, re.VERBOSE)
    desc = result.group("desc")
   
    # pull out the storyId (this syntax is overkill but "proper")
    queryParms = dict([n for n in [i.split("=") for i in 
        urlparse(url)[4].split("&")]])

    # assemble the path to the XML generator
    xmlurl = xmlgen + "?id=" + queryParms["storyId"] + "&t=1"
    if options.verbose:
        print "Assembled URL to story XML file is", xmlurl

    # download the smil file and convert to an XML DOM
    nprxml = urllib.urlopen(xmlurl).read()
    nprdoc = minidom.parseString(nprxml)

    # go through the links in the XML (there is probably just one)
    links = xpath.Evaluate("/item", nprdoc)
    for link in links:
        mediaurl = mediaprefix +                                              \
                   link.getElementsByTagName("mediaUrl")[0].firstChild.nodeValue
        title = link.getElementsByTagName("title")[0].firstChild.nodeValue
        artist = link.getElementsByTagName("program")[0].firstChild.nodeValue
        year = link.getElementsByTagName("date")[0].firstChild.nodeValue[-4:]

        # filename of the MP3 result (usually yymmdd_show_xx.mp3)
        resname = os.path.basename(urlsplit(mediaurl)[2])

        # download the MP3 file
        if options.verbose:
            print "Downloading ", mediaurl, "to", resname
        urllib.urlretrieve(mediaurl, resname)

        # tag the MP3 file
        tagmp3(resname, {"title":title,       \
                         "artist":artist,     \
                         "year":year,         \
                         "desc":desc,         \
                         "wwwaudiofile":url})


def wamu (url):
    """
    Download a story from WAMU (Diane Rehm or Kojo Nnamdi)

    @type url: string
    @param url: the URL of the media.  
     This should look like::
       http://www.wamu.org/audio/dr/05/05/r1050511.ram 
     or::
       http://www.wamu.org/audio/dr/05/11/r2051121-9681.ram
     which is the "Listen to this show | Real Player" link on the story page.
     It will ultimately be converted to something like::
       http://archives.wamu.org/dr/05/03/r1050301.rm
     before calling getmp3
    """

    if options.verbose:
        print "Handling", url, "as a WAMU link"

    # download the .ram file; the URL of the .rm file is the last line
    mediaurl = urllib.urlopen(url).readlines()[-1]

    # dictionary of the show codes to their names, for the ID3 tags
    shownames = {"r":"The Diane Rehm Show", "k":"The Kojo Nnamdi Show"}
    prefixes = {"r":"dr", "k":"kojo"}

    # get just the filename
    file = os.path.splitext(os.path.basename(urlsplit(mediaurl)[2]))[0]

    # break apart fields in the filename   r1050511
    show = file[0]                       # ^ r for Diane Rehm, k for Kojo Nnamdi
    segment = file[1]                    #  ^
    yr = file[2:4]                       #   ^^
    month = file[4:6]                    #     ^^
    day = file[6:8]                      #       ^^
    year = str(int(yr) + 2000)
    date = year + "-" + month + "-" + day
    prefix = prefixes[show];

    # the name of the .mp3 file, minus the ".mp3"
    result = year + month + day + "_" + prefix + "_" + segment

    #print mediaurl, result, shownames[show], segment
    getmp3(mediaurl, result, {"title":shownames[show],                         \
                              "artist":date + " seg. " + segment,              \
                              "year":str(year)})


def ttbook(url):
    """
    Download To the Best of our Knowledge

    @type url: string
    @param url: the URL of the media.  
     This should look like::
       http://clipcast.wpr.org:8080/ramgen/wpr/bok/bok050626a.rm
     which is the "Listen" link on the WPR Audio Archives page
    """

    if options.verbose:
        print "Handling", url, "as a TTBOOK link"

    # download the file and take the first line as the address of the content
    mediaurl = urllib.urlopen(url).readlines()[0]
    # the line will look like this:
    # rtsp://128.104.248.78:554/wpr/bok/bok060723b.rm?cloakport=8080,554,7070
    # get just the filename
    file = os.path.splitext(os.path.basename(urlsplit(mediaurl)[2]))[0]
    showname = "To the Best of Our Knowledge"

    # break apart fields in the filename   bok050626a
    year = str(int(file[3:5]) + 2000)    #    ^^
    month = file[5:7]                    #      ^^
    day = file[7:9]                      #        ^^
    segment = file[9]                    #          ^
    date = year + "-" + month + "-" + day

    # the name of the .mp3 file, minus the ".mp3"
    result = year + month + day + "_ttbook_" + segment

    getmp3(mediaurl, result, {"title":showname,                                \
                              "artist":date + " seg. " + segment,              \
                              "year":str(year),                                \
                              "wwwaudiofile":url})


def rtsp(url):
    """
    Download an rtsp:// URL.  Use when a quick-and-dirty rtsp download is needed

    No ID3 information is generated.

    @type url: string
    @param url: the URL of the media.  
     This should look like::
       rtsp://server.something.com/file.rm

    """

    if options.verbose:
        print "Handling", url, "as a generic RTSP link"

    # determine the filename to use for the output
    # get just the filename, and drop the extension (assuming it's .rm)
    result = url[url.rindex("/") + 1:].replace(".rm", "")

    getmp3(url, result, {"title":result,                                  \
                         "artist":result,                                 \
                         "year":str(time.localtime()[0])})


def main(args):

    # parse the command line
    usage = "usage: %prog [options] url"
    parser = OptionParser(usage)

    parser.add_option("-n", "--no-normalize", action="store_true",           \
                      dest="nonormalize",                                    \
                      help="don't run normalize (allows for using a FIFO)")
    parser.add_option("-f", "--no-fifo", action="store_true", dest="nofifo", \
                      help="don't use a FIFO (if it was ever an option)")
    parser.add_option("-d", "--no-delete", action="store_true",              \
                      dest="nodelete",                                       \
                      help="don't delete intermediate WAV file (if it exists)")
    parser.add_option("-v", "--verbose", action="store_true", dest="verbose",\
                      help="print detailed messages at every step")

    global options
    (options, args) = parser.parse_args()
    if len(args) < 1:
        parser.print_help()
        return
    else:
        url = args[0]

    # determine what they sent
    # a bare rtsp:// URL (just download and convert)
    if (re.search("^rtsp://", url)):
        rtsp(url)
    # link to NPR story page:
    # http://www.npr.org/templates/story/story.php?storyId=4719492
    #            ^^^^^^^^^^^^^^^^^
    elif (re.search("npr.org/templates", url)):
        npr(url)
    # listen link from WAMU (Diane Rehm or Kojo Nnamdi Shows)
    # http://www.wamu.org/audio/dr/05/04/r2050425.ram or
    # http://www.wamu.org/audio/dr/05/11/r2051121-9681.ram
    #        ^^^^^^^^^^^^
    elif (re.search("www.wamu.org", url)):
        wamu(url)
    # listen link for TTBOOK on WPR Audio Archive page
    # http://clipcast.wpr.org:8080/ramgen/wpr/bok/bok050626a.rm
    #                              ^^^^^^^^^^
    # or maybe
    # http://broadcast.uwex.edu:8080/ramgen/wpr/bok/bok060716b.rm
    #                                ^^^^^^^^^^
    elif (re.search("ramgen/wpr", url)):
        ttbook(url)
    else:
        print("Unknown link type: " + url)

    print "Done."


if __name__ == '__main__':
    sys.exit(main(sys.argv))
