jnfilter @ 5d86d6c8a55258708aa0a9596b9e0c7132e58e45

 1import re
 2from functools import reduce
 3from typing import List, Iterator
 4from xml.etree.ElementTree import ElementTree, fromstring, tostring, register_namespace
 5
 6import httpx
 7from fastapi import FastAPI
 8from starlette.responses import Response, PlainTextResponse
 9
10app = FastAPI()
11
12URL = "https://jovemnerd.com.br/feed-nerdcast/"
13
14RegexCollection = {
15    "nerdcast": "NerdCast [0-9]+[a-c]* -",
16    "empreendedor": "Empreendedor [0-9]+ -",
17    "mamicas": "Caneca de Mamicas [0-9]+ -",
18    "english": "Speak English [0-9]+ -",
19    "nerdcash": "NerdCash [0-9]+ -",
20    "bunker": "Lá do Bunker  [0-9]+ -",
21}
22
23register_namespace("googleplay", "http://www.google.com/schemas/play-podcasts/1.0")
24register_namespace("itunes", "http://www.itunes.com/dtds/podcast-1.0.dtd")
25register_namespace("atom", "http://www.w3.org/2005/Atom")
26
27
28class XMLResponse(Response):
29    media_type = "application/xml"
30
31
32def match(title: str, series: List[str]) -> bool:
33    def _match(s):
34        return re.match(RegexCollection[s], title) is not None
35
36    return reduce(lambda x, y: x or _match(y), series, False)
37
38
39def filter_xml(xml_str: str, series: List[str]) -> str:
40    tree = ElementTree(fromstring(xml_str))
41    tree_root = tree.getroot()
42    for channel in tree_root.findall("./channel"):
43        for item in channel.findall("item"):
44            title = item.find("title").text
45            if not match(title, series):
46                channel.remove(item)
47
48    return tostring(tree_root, encoding='utf8', method='xml')
49
50
51def filter_titles_xml(xml_str) -> Iterator[str]:
52    tree = ElementTree(fromstring(xml_str))
53    tree_root = tree.getroot()
54    for item in tree_root.findall("./channel/item"):
55        yield item.find("title").text
56
57
58async def load_and_filter(series: str) -> str:
59    series = series or 'nerdcast'
60    series = series.split(',')
61    async with httpx.AsyncClient() as client:
62        response = await client.get(URL)
63        xml_str = response.content
64        return filter_xml(xml_str, series)
65
66
67async def load_titles() -> Iterator[str]:
68    async with httpx.AsyncClient() as client:
69        response = await client.get(URL)
70        xml_str = response.content
71        return filter_titles_xml(xml_str)
72
73@app.head("/")
74@app.get("/", response_class=XMLResponse)
75async def root(q: str = ''):
76    return await load_and_filter(q)
77
78
79@app.get("/titles", response_class=PlainTextResponse)
80async def titles():
81    titles = await load_titles()
82    return "\n".join(titles)
83
84
85@app.get("/series")
86async def titles():
87    return [i[0] for i in RegexCollection.items()]