Library

web_request_json_get_return

Send basic GET request to given URL and returns basic JSON.

def web_request_json_get_return(url):
    r = requests.get(url).json()
    return r

web_request_html_get_return

Simple HTTP GET that return the HTML Content. This function is also called in: web_request_get_url_list

def web_request_html_return(url):
    r = requests.get(url).text
    return r

web_request_json_post_return

Send basic POST request to given URL and returns basic JSON.

def web_request_json_post_return(url):
    r = requests.post(url).json()
    return r

web_request_get_url_list

Parses through HTML line by line and returns a list of urls.

def web_request_get_url_list(url):
    r = web_request_html_return(url)
    wstring = r.split("\n")
    urls = []
    for l in range(len(wstring)):
        if "href=" in wstring[l]:
            chref = wstring[l].split('href="')
            
            if len(chref) > 0:
                for h in range(1, len(chref), 2):
                    char = chref[h]
                    c = 0
                    col = []
                    while char[c] != '"' and char[c] != "\n":
                        col.append(char[c])
                        c = c + 1;

                    if "".join(col) not in urls:
                        urls.append("".join(col))

    return urls

web_request_get_url_list_from_url_list

Parses through HTML line by line and returns a list of urls.

def web_request_get_url_list_from_url_list(urlist):
    urls = []
    for u in range(len(urlist)):
        r = web_request_html_return(urlist[u])
        wstring = r.split("\n")
        for l in range(len(wstring)):
            if "href=" in wstring[l]:
                chref = wstring[l].split('href="')
                
                if len(chref) > 0:
                    for h in range(1, len(chref), 2):
                        char = chref[h]
                        c = 0
                        col = []
                        while char[c] != '"' and char[c] != "\n":
                            col.append(char[c])
                            c = c + 1;

                        if urlist[u] not in "".join(col) and "".join(col) not in urls:
                            urls.append(f'{urlist[u]}{"".join(col)}')
                        elif "".join(col) not in urls:
                            urls.append("".join(col))

    return urls

web_request_get_images_list

Parses through HTML line by line and returns a list of image urls.

def web_request_get_images_list(url):
    r = web_request_html_return(url)
    wstring = r.split("\n")
    urls = []
    for l in range(len(wstring)):
        if "img src=" in wstring[l]:
            chref = wstring[l].split('img src="')
            
            if len(chref) > 0:
                for h in range(1, len(chref), 2):
                    char = chref[h]
                    c = 0
                    col = []
                    while char[c] != '"' and char[c] != "\n":
                        col.append(char[c])
                        c = c + 1;

                    if "".join(col) not in urls:
                        urls.append("".join(col))

    return urls

web_request_dom_get

Fetch dom entry

def web_request_dom_get(url, dom):
    r = web_request_html_return(url)
    wstring = r.split("\n")
    urls = []
    for l in range(len(wstring)):
        if dom in wstring[l]:
            chref = wstring[l].split(f'{dom}')
            
            if len(chref) > 0:
                for h in range(1, len(chref), 2):
                    char = chref[h]
                    c = 0
                    col = []
                    while char[c] != '"' and char[c] != "\n":
                        col.append(char[c])
                        c = c + 1;

                    if "".join(col) not in urls:
                        urls.append("".join(col))

    return urls