devlater-ruby/github_links.rb at master · oxrug/devlater-ruby · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
require 'nokogiri'
require 'httparty'
#require 'watir'

#Selenium::WebDriver::Chrome.driver_path = '/usr/lib/chromium-browser/chromedriver'

def get_all_links_as_hash(page)
  parsed_page = Nokogiri::HTML(page)
  links = parsed_page.css('a')

  all_links = Hash[links.xpath('//a[@href]').map {|link| [link.text.strip, link['href']]}]

  return all_links
end

def extract_github_from_hash(hashed_links)
  result = {}
  hashed_links.each do |key, value|
    if key.downcase.include?('github') and value.downcase.include?('/github')
      result[key] = value
    end
  end

  return result
end

puts 'what is the URL of the tutorial?'
url = gets.strip

response = HTTParty.get(url)

document_links = get_all_links_as_hash(response.body)

filtered_links = extract_github_from_hash(document_links)

puts filtered_links

#def find_all_pages(url)

#  browser = Watir::Browser.new :chrome, headless: true
  # starts at the first page of the tutorial
#  browser.goto(url)
  # saves the url of the first page
#  current_url = browser.url

  # grab the page content here for later, then go to the next
  # page if it exists

#  page_content = browser.html

#  js_link = browser.link(:class, "rightArrow")
#  js_link.click if js_link

#  browser.wait_until { browser.url != current_url }

#  page_content += browser.html

#  open('content.txt', 'w') { |f|
#    f.puts page_content
#  }

#end

#find_all_pages(url)