-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgithub_links.rb
More file actions
64 lines (43 loc) · 1.37 KB
/
github_links.rb
File metadata and controls
64 lines (43 loc) · 1.37 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
require 'nokogiri'
require 'httparty'
#require 'watir'
#Selenium::WebDriver::Chrome.driver_path = '/usr/lib/chromium-browser/chromedriver'
def get_all_links_as_hash(page)
parsed_page = Nokogiri::HTML(page)
links = parsed_page.css('a')
all_links = Hash[links.xpath('//a[@href]').map {|link| [link.text.strip, link['href']]}]
return all_links
end
def extract_github_from_hash(hashed_links)
result = {}
hashed_links.each do |key, value|
if key.downcase.include?('github') and value.downcase.include?('/github')
result[key] = value
end
end
return result
end
puts 'what is the URL of the tutorial?'
url = gets.strip
response = HTTParty.get(url)
document_links = get_all_links_as_hash(response.body)
filtered_links = extract_github_from_hash(document_links)
puts filtered_links
#def find_all_pages(url)
# browser = Watir::Browser.new :chrome, headless: true
# starts at the first page of the tutorial
# browser.goto(url)
# saves the url of the first page
# current_url = browser.url
# grab the page content here for later, then go to the next
# page if it exists
# page_content = browser.html
# js_link = browser.link(:class, "rightArrow")
# js_link.click if js_link
# browser.wait_until { browser.url != current_url }
# page_content += browser.html
# open('content.txt', 'w') { |f|
# f.puts page_content
# }
#end
#find_all_pages(url)