searx/engines/bandcamp.py

from lxml import html
from searx.engines.xpath import extract_text
from searx.url_utils import quote

categories = ['music']

url = ' https://bandcamp.com/search'
search_url = url + '?q={query}&page={pageno}'

def clean_url(url):
    return url.split('?')[0]

def request(query, params):
    params['url'] = search_url.format(query=quote(query), pageno=params['pageno'])

    return params

def response(resp):
    results = []

    dom = html.fromstring(resp.text.encode('utf-8'))

    for result in dom.xpath('//li[starts-with(@class, "searchresult")]'):
        result_info = result.xpath('div[@class="result-info"]')[0]

        result_type = extract_text(result_info.xpath('div[@class="itemtype"]/text()')).capitalize()
        if result_type == 'Fan':
            continue

        title = extract_text(result_info.xpath('div[@class="heading"]/a/text()'))
        subhead = extract_text(result_info.xpath('div[@class="subhead"]/text()'))
        href = clean_url(result_info.xpath('div[@class="heading"]/a/@href')[0])
        image = result.xpath('a[@class="artcont"]/div[@class="art"]/img/@src')[0]

        results.append({'url': href,
                        'title': title,
                        'image': image,
                        'subhead': subhead,
                        'type': result_type,
                        'template': 'bandcamp.html'})

    return results