jnfilter @ ef20913e3bb403ae46c5780fe6b6a560cb0c4479

 1import re
 2from functools import reduce
 3from typing import List, Iterator
 4from xml.etree.ElementTree import ElementTree, fromstring, tostring, register_namespace
 5
 6import httpx
 7from fastapi import FastAPI
 8from starlette.responses import Response, PlainTextResponse
 9
10app = FastAPI()
11
12URL = "https://jovemnerd.com.br/feed-nerdcast/"
13
14RegexCollection = {
15    "nerdcast": "NerdCast [0-9]+[a-c]* -",
16    "empreendedor": "Empreendedor [0-9]+ -",
17    "mamicas": "Caneca de Mamicas [0-9]+ -",
18    "english": "Speak English [0-9]+ -",
19    "nerdcash": "NerdCash [0-9]+ -",
20    "bunker": "Lá do Bunker  [0-9]+ -",
21    "tech": "NerdTech [0-9]+ -",
22    "genera": "Generacast [0-9]+ -",
23}
24
25register_namespace("googleplay", "http://www.google.com/schemas/play-podcasts/1.0")
26register_namespace("itunes", "http://www.itunes.com/dtds/podcast-1.0.dtd")
27register_namespace("atom", "http://www.w3.org/2005/Atom")
28
29
30class XMLResponse(Response):
31    media_type = "application/xml"
32
33
34def match(title: str, series: List[str]) -> bool:
35    def _match(s):
36        return re.match(RegexCollection[s], title) is not None
37
38    return reduce(lambda x, y: x or _match(y), series, False)
39
40
41def filter_xml(xml_str: str, series: List[str]) -> str:
42    tree = ElementTree(fromstring(xml_str))
43    tree_root = tree.getroot()
44    for channel in tree_root.findall("./channel"):
45        for item in channel.findall("item"):
46            title = item.find("title").text
47            if not match(title, series):
48                channel.remove(item)
49
50    return tostring(tree_root, encoding='utf8', method='xml')
51
52
53def filter_titles_xml(xml_str) -> Iterator[str]:
54    tree = ElementTree(fromstring(xml_str))
55    tree_root = tree.getroot()
56    for item in tree_root.findall("./channel/item"):
57        yield item.find("title").text
58
59
60async def load_and_filter(series: str) -> str:
61    series = series or 'nerdcast'
62    series = series.split(',')
63    async with httpx.AsyncClient() as client:
64        response = await client.get(URL)
65        xml_str = response.content
66        return filter_xml(xml_str, series)
67
68
69async def load_titles() -> Iterator[str]:
70    async with httpx.AsyncClient() as client:
71        response = await client.get(URL)
72        xml_str = response.content
73        return filter_titles_xml(xml_str)
74
75@app.head("/")
76@app.get("/", response_class=XMLResponse)
77async def root(q: str = ''):
78    return await load_and_filter(q)
79
80
81@app.get("/titles", response_class=PlainTextResponse)
82async def titles():
83    titles = await load_titles()
84    return "\n".join(titles)
85
86
87@app.get("/series")
88async def titles():
89    return [i[0] for i in RegexCollection.items()]