For checkouts or to view logs direct your SVN client to svn://svn.saintamh.org/code/azimuth/py/airports.py

#!/usr/bin/env python

"""
$Id: airports.py 2460 2013-09-14 08:54:19Z saintamh $
"""

#----------------------------------------------------------------------------------------------------------------------------------
# includes

# standards
import urlparse

# saintamh
from saintamh.struct import *
from saintamh.util.etree import xpath_all,xpath_one
from saintamh.util.html import html_etree
from saintamh.util.maps import LatLng
from saintamh.util.regexes import capture_one
from saintamh.util.scrapers import extract_js, extract_text, make_all_urls_absolute

#----------------------------------------------------------------------------------------------------------------------------------
# data structs

class RunwayInfo (struct (
        id = {
            'class': str,
            'regex': r'^(?:0?[1-9]|[12]\d|3[0-6])[LRC]?/(?:0?[1-9]|[12]\d|3[0-6])[LRC]?$',
            'coerce_value': lambda s: s and s.upper(),
            },
        latlng_at_both_ends = pair_of (LatLng),
        )):
    pass

class AirportInfo (struct (
        iata_id = uppercase_letters(3),
        icao_id = uppercase_letters(4),
        latlng = LatLng,
        runways = nullable (seq_of (RunwayInfo)),
        )):
    pass


#----------------------------------------------------------------------------------------------------------------------------------
# private scraper functions

def _fetch_airport_info_from_flightstats (iata_airport_id, fetch_html):

    airport_html = fetch_html (
        'http://www.flightstats.com/go/Airport/airportDetails.do?airport=%s' % iata_airport_id,
        user_agent = 'Mozilla/5.0 (X11; Linux x86_64; rv:21.0) Gecko/20100101 Firefox/21.0',
        encoding = 'UTF-8',
        )

    return {
        'latlng': LatLng (
            *tuple (
                capture_one (
                    r'\bairport%s\s*=\s*([\-\+]?(?:\d{1,2}|1\d\d)\.\d+)\s*[\n;]' % coord,
                    extract_js (airport_html),
                    )
                for coord in ('Latitude', 'Longitude')
                )
             ),
        'icao_id': capture_one (
            (r'\b ICAO\s+code \s*:\s* ([A-Z]{4}) \b', 'ix'),
            extract_text (
                xpath_one (
                    '//div[@class="overviewDetailsTable"]',
                    airport_html,
                    ),
                ),
            ),
        }


def _fetch_runways_from_asiavis (iata_airport_id, fetch_html):

    airport_page_html = fetch_html (
        'http://web.asiavis.com/srch_result.php?cari=iata&masukan=%s' % iata_airport_id,
        encoding = 'Windows-1252',
        )

    all_runway_details_urls = tuple (
        xpath_one ('.//a[contains(@href,"runwayinfo")]/@href', row_el)
        for row_el in xpath_all (
            './tbody/tr[./td/@class="infocontent"]',
            xpath_one (
                '//div[@id="runways"]/table',
                airport_page_html
                )
            )
        )

    for runway_details_url in all_runway_details_urls:

        runway_details_html = fetch_html (runway_details_url, encoding='Windows-1252')
        runway_details_url_query = urlparse.parse_qs (
            urlparse.urlparse(runway_details_url).query
            )

        yield RunwayInfo (
            id = '/'.join (runway_details_url_query[p][0] for p in ('rwyH', 'rwyL')),
            latlng_at_both_ends = (
                LatLng (
                    *tuple (
                        xpath_one (
                            './tbody/tr/td[text()="%s"]/following-sibling::td/text()' % coord,
                            runway_endpoint_table_el
                            )
                        for coord in ('Latitude', 'Longitude')
                        )
                     )
                for runway_endpoint_table_el in xpath_all (
                    '//table[./tbody/tr/td/@class="infocontent"]',
                    runway_details_html
                    )
                ),
            )


#----------------------------------------------------------------------------------------------------------------------------------
# public functions

def fetch_airport_info (iata_airport_id, http_get):

    def fetch_html (url, **kwargs):
        encoding = kwargs.pop ('encoding', None)
        html_str = http_get (url, **kwargs)
        if encoding:
            html_str = html_str.decode (encoding)
        return make_all_urls_absolute (url, html_etree(html_str))

    airport_info_kwargs = {
        'iata_id': iata_airport_id,
        'runways': [], #_fetch_runways_from_asiavis (iata_airport_id, fetch_html),
        }
    airport_info_kwargs.update (
        _fetch_airport_info_from_flightstats (iata_airport_id, fetch_html),
        )

    return AirportInfo (**airport_info_kwargs)
    

#----------------------------------------------------------------------------------------------------------------------------------