stackoverflow.com
import requests
from bs4 import BeautifulSoup
from packaging.version import parse as parseVersion
r = requests.get("https://archive.apache.org/dist/pdfbox/")
r.raise_for_status()
soup = BeautifulSoup(r.content, "html.parser")
versions = [i.get("href").rstrip("/") for i in soup.select('a[href^="2."]') if "RC" not in i.get("href")]
versions.sort(key = parseVersion)
versions
import pandas as pd
df = pd.DataFrame(versions, columns=["version"])
df[["major", "minor", "patch"]] = df["version"].str.split(".", expand=True).astype(int)
df.sort_values(by=["major", "minor", "patch"], inplace=True)
latest = df.iloc[-1]["version"]
url = f"https://archive.apache.org/dist/pdfbox/{latest}/pdfbox-app-{latest}.jar"
url