Library
web_request_json_get_return
Send basic GET request to given URL and returns basic JSON.
def web_request_json_get_return(url):
r = requests.get(url).json()
return r
web_request_html_get_return
Simple HTTP GET that return the HTML Content. This function is also called in: web_request_get_url_list
def web_request_html_return(url):
r = requests.get(url).text
return r
web_request_json_post_return
Send basic POST request to given URL and returns basic JSON.
def web_request_json_post_return(url):
r = requests.post(url).json()
return r
web_request_get_url_list
Parses through HTML line by line and returns a list of urls.
def web_request_get_url_list(url):
r = web_request_html_return(url)
wstring = r.split("\n")
urls = []
for l in range(len(wstring)):
if "href=" in wstring[l]:
chref = wstring[l].split('href="')
if len(chref) > 0:
for h in range(1, len(chref), 2):
char = chref[h]
c = 0
col = []
while char[c] != '"' and char[c] != "\n":
col.append(char[c])
c = c + 1;
if "".join(col) not in urls:
urls.append("".join(col))
return urls
web_request_get_url_list_from_url_list
Parses through HTML line by line and returns a list of urls.
def web_request_get_url_list_from_url_list(urlist):
urls = []
for u in range(len(urlist)):
r = web_request_html_return(urlist[u])
wstring = r.split("\n")
for l in range(len(wstring)):
if "href=" in wstring[l]:
chref = wstring[l].split('href="')
if len(chref) > 0:
for h in range(1, len(chref), 2):
char = chref[h]
c = 0
col = []
while char[c] != '"' and char[c] != "\n":
col.append(char[c])
c = c + 1;
if urlist[u] not in "".join(col) and "".join(col) not in urls:
urls.append(f'{urlist[u]}{"".join(col)}')
elif "".join(col) not in urls:
urls.append("".join(col))
return urls
web_request_get_images_list
Parses through HTML line by line and returns a list of image urls.
def web_request_get_images_list(url):
r = web_request_html_return(url)
wstring = r.split("\n")
urls = []
for l in range(len(wstring)):
if "img src=" in wstring[l]:
chref = wstring[l].split('img src="')
if len(chref) > 0:
for h in range(1, len(chref), 2):
char = chref[h]
c = 0
col = []
while char[c] != '"' and char[c] != "\n":
col.append(char[c])
c = c + 1;
if "".join(col) not in urls:
urls.append("".join(col))
return urls
web_request_dom_get
Fetch dom entry
def web_request_dom_get(url, dom):
r = web_request_html_return(url)
wstring = r.split("\n")
urls = []
for l in range(len(wstring)):
if dom in wstring[l]:
chref = wstring[l].split(f'{dom}')
if len(chref) > 0:
for h in range(1, len(chref), 2):
char = chref[h]
c = 0
col = []
while char[c] != '"' and char[c] != "\n":
col.append(char[c])
c = c + 1;
if "".join(col) not in urls:
urls.append("".join(col))
return urls