pihole.py 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. import requests
  4. from datetime import datetime
  5. from enum import Enum
  6. from influxdb_client import Point
  7. from pandas import DataFrame
  8. from urllib.parse import urlparse
  9. class QueryStati(Enum):
  10. Blocked = 1
  11. Forwarded = 2
  12. Cached = 3
  13. Wildcard = 4
  14. Unknown = 5
  15. class PiHole:
  16. def __init__(self, url, token):
  17. self.host = url
  18. self.url = urlparse(url)
  19. self.token = token
  20. def query(self, endpoint, params={}):
  21. url = "{}://{}/admin/{}.php".format(self.url.scheme or "http", self.url.netloc, endpoint)
  22. return requests.get(url, params=params)
  23. def request_all_queries(self, start: float, end: float):
  24. """
  25. keys[]: time, query_type, domain, client, status, destination, reply_type, reply_time, dnssec
  26. """
  27. if not self.token:
  28. raise Exception("Token required")
  29. params = {
  30. "getAllQueries": "",
  31. "from": int(start),
  32. "until": int(end),
  33. "auth": self.token
  34. }
  35. json = self.query("api_db", params=params).json()
  36. if json:
  37. return json['data']
  38. else:
  39. return []
  40. def request_summary(self):
  41. """
  42. keys:
  43. - domains_being_blocked
  44. - dns_queries_today
  45. - ads_blocked_today
  46. - ads_percentage_today
  47. - unique_domains
  48. - queries_forwarded
  49. - queries_cached
  50. - clients_ever_seen
  51. - unique_clients
  52. - dns_queries_all_types
  53. - reply_UNKNOWN
  54. - reply_NODATA
  55. - reply_NXDOMAIN
  56. - reply_CNAME
  57. - reply_IP
  58. - reply_DOMAIN
  59. - reply_RRNAME
  60. - reply_SERVFAIL
  61. - reply_REFUSED
  62. - reply_NOTIMP
  63. - reply_OTHER
  64. - reply_DNSSEC
  65. - reply_NONE
  66. - reply_BLOB
  67. - dns_queries_all_replies
  68. - privacy_level
  69. - status
  70. - gravity_last_update: file_exists, absolute, relative
  71. """
  72. json = self.query("api").json()
  73. return json
  74. def request_forward_destinations(self):
  75. if not self.token:
  76. raise Exception("Token required")
  77. params = {
  78. "getForwardDestinations": "",
  79. "auth": self.token
  80. }
  81. json = self.query("api", params=params).json()
  82. if json:
  83. return json['forward_destinations']
  84. else:
  85. return {}
  86. def request_query_types(self):
  87. if not self.token:
  88. raise Exception("Token required")
  89. params = {
  90. "getQueryTypes": "",
  91. "auth": self.token
  92. }
  93. json = self.query("api", params=params).json()
  94. if json:
  95. return json['querytypes']
  96. else:
  97. return {}
  98. def get_totals_for_influxdb(self):
  99. summary = self.request_summary()
  100. timestamp = datetime.now().astimezone()
  101. yield Point("domains") \
  102. .time(timestamp) \
  103. .tag("hostname", self.host) \
  104. .field("domain_count", summary['domains_being_blocked']) \
  105. .field("unique_domains", summary['unique_domains']) \
  106. .field("forwarded", summary['queries_forwarded']) \
  107. .field("cached", summary['queries_cached'])
  108. yield Point("queries") \
  109. .time(timestamp) \
  110. .tag("hostname", self.host) \
  111. .field("queries", summary['dns_queries_today']) \
  112. .field("blocked", summary['ads_blocked_today']) \
  113. .field("ads_percentage", summary['ads_percentage_today'])
  114. yield Point("clients") \
  115. .time(timestamp) \
  116. .tag("hostname", self.host) \
  117. .field("total_clients", summary['clients_ever_seen']) \
  118. .field("unique_clients", summary['unique_clients']) \
  119. .field("total_queries", summary['dns_queries_all_types'])
  120. yield Point("other") \
  121. .time(timestamp) \
  122. .tag("hostname", self.host) \
  123. .field("status", summary['status'] == 'enabled') \
  124. .field("gravity_last_update", summary['gravity_last_updated']['absolute'])
  125. if self.token:
  126. query_types = self.request_query_types()
  127. for key, value in query_types.items():
  128. yield Point("query_types") \
  129. .time(timestamp) \
  130. .tag("hostname", self.host) \
  131. .tag("query_type", key) \
  132. .field("value", float(value))
  133. forward_destinations = self.request_forward_destinations()
  134. for key, value in forward_destinations.items():
  135. yield Point("forward_destinations") \
  136. .time(timestamp) \
  137. .tag("hostname", self.host) \
  138. .tag("destination", key.split('|')[0]) \
  139. .field("value", float(value))
  140. def get_queries_for_influxdb(self, query_date: datetime, sample_period: int):
  141. # Get all queries since last sample
  142. end_time = query_date.timestamp()
  143. start_time = end_time - sample_period + 1
  144. queries = self.request_all_queries(start_time, end_time)
  145. timestamp = datetime.now().astimezone()
  146. df = DataFrame(queries, columns=['time', 'query_type', 'domain', 'client', 'status', 'destination', 'reply_type', 'reply_time', 'dnssec'])
  147. # we still need some stats from the summary
  148. summary = self.request_summary()
  149. yield Point("domains") \
  150. .time(timestamp) \
  151. .tag("hostname", self.host) \
  152. .field("domain_count", summary['domains_being_blocked']) \
  153. .field("unique_domains", len(df.groupby('domain'))) \
  154. .field("forwarded", len(df[df['status'] == QueryStati.Forwarded.value])) \
  155. .field("cached", len(df[df['status'] == QueryStati.Cached.value]))
  156. blocked_count = len(df[(df['status'] == QueryStati.Blocked.value) | (df['status'] == QueryStati.Wildcard.value)])
  157. queries_point = Point("queries") \
  158. .time(timestamp) \
  159. .tag("hostname", self.host) \
  160. .field("queries", len(df)) \
  161. .field("blocked", blocked_count) \
  162. .field("ads_percentage", blocked_count * 100.0 / max(1, len(df)))
  163. yield queries_point
  164. for key, client_df in df.groupby('client'):
  165. blocked_count = len(client_df[(client_df['status'] == QueryStati.Blocked.value) | (client_df['status'] == QueryStati.Wildcard.value)])
  166. clients_point = Point("clients") \
  167. .time(timestamp) \
  168. .tag("hostname", self.host) \
  169. .tag("client", key) \
  170. .field("queries", len(client_df)) \
  171. .field("blocked", blocked_count) \
  172. .field("ads_percentage", blocked_count * 100.0 / max(1, len(client_df)))
  173. yield clients_point
  174. yield Point("other") \
  175. .time(timestamp) \
  176. .tag("hostname", self.host) \
  177. .field("status", summary['status'] == 'enabled') \
  178. .field("gravity_last_update", summary['gravity_last_updated']['absolute'])
  179. for key, group_df in df.groupby('query_type'):
  180. yield Point("query_types") \
  181. .time(timestamp) \
  182. .tag("hostname", self.host) \
  183. .tag("query_type", key) \
  184. .field("queries", len(group_df))
  185. for key, group_df in df.groupby('destination'):
  186. yield Point("forward_destinations") \
  187. .time(timestamp) \
  188. .tag("hostname", self.host) \
  189. .tag("destination", key.split('|')[0]) \
  190. .field("queries", len(group_df))
  191. def get_query_logs_for_influxdb(self, query_date: datetime, sample_period: int):
  192. end_time = query_date.timestamp()
  193. start_time = end_time - sample_period + 1
  194. for data in self.request_all_queries(start_time, end_time):
  195. timestamp, query_type, domain, client, status, destination, reply_type, reply_time, dnssec = data
  196. p = Point("logs") \
  197. .time(datetime.fromtimestamp(timestamp)) \
  198. .tag("hostname", self.host) \
  199. .tag("query_type", query_type) \
  200. .field("domain", domain) \
  201. .tag("client", client) \
  202. .tag("status", QueryStati(status)) \
  203. .tag("dnssec", dnssec != 0) \
  204. .field("reply_time", reply_time)
  205. if destination:
  206. p.tag("destination", destination)
  207. yield p
  208. if __name__ == "__main__":
  209. import argparse
  210. parser = argparse.ArgumentParser(description='Export Pi-Hole statistics')
  211. parser.add_argument('--host', required=True, type=str, help='Pi-Hole host')
  212. parser.add_argument('--token', '-t', required=True, type=str, help='Pi-Hole API token')
  213. args = parser.parse_args()
  214. pihole = PiHole(host=args.host, token=args.token)
  215. points = list(pihole.get_queries_for_influxdb(datetime.now(), 600))
  216. for p in points:
  217. print(p._time, p._name, p._tags, p._fields)