diff --git a/README.md b/README.md index 6d61989..05ae813 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ **Documentation can be found at [kademlia.readthedocs.org](http://kademlia.readthedocs.org/).** -This library is an asynchronous Python implementation of the [Kademlia distributed hash table](http://en.wikipedia.org/wiki/Kademlia). It uses [Twisted](https://twistedmatrix.com) to provide asynchronous communication. The nodes communicate using [RPC over UDP](https://github.com/bmuller/rpcudp) to communiate, meaning that it is capable of working behind a [NAT](http://en.wikipedia.org/wiki/NAT). +This library is an asynchronous Python implementation of the [Kademlia distributed hash table](http://en.wikipedia.org/wiki/Kademlia). It uses the [asyncio library](https://docs.python.org/3/library/asyncio.html) in Python 3 to provide asynchronous communication. The nodes communicate using [RPC over UDP](https://github.com/bmuller/rpcudp) to communiate, meaning that it is capable of working behind a [NAT](http://en.wikipedia.org/wiki/NAT). This library aims to be as close to a reference implementation of the [Kademlia paper](http://pdos.csail.mit.edu/~petar/papers/maymounkov-kademlia-lncs.pdf) as possible. @@ -15,53 +15,36 @@ pip install kademlia ``` ## Usage -*This assumes you have a working familiarity with [Twisted](https://twistedmatrix.com).* +*This assumes you have a working familiarity with [asyncio](https://docs.python.org/3/library/asyncio.html).* -Assuming you want to connect to an existing network (run the standalone server example below if you don't have a network): +Assuming you want to connect to an existing network: ```python -from twisted.internet import reactor -from twisted.python import log +import asyncio from kademlia.network import Server -import sys -# log to std out -log.startLogging(sys.stdout) +# Create a node and start listening on port 5678 +node = Server() +node.listen(5678) -def quit(result): - print "Key result:", result - reactor.stop() +# Bootstrap the node by connecting to other known nodes, in this case +# replace 123.123.123.123 with the IP of another node and optionally +# give as many ip/port combos as you can for other nodes. +loop = asyncio.get_event_loop() +loop.run_until_complete(node.bootstrap([("123.123.123.123", 5678)])) -def get(result, server): - return server.get("a key").addCallback(quit) +# set a value for the key "my-key" on the network +loop.run_until_complete(node.set("my-key", "my awesome value")) -def done(found, server): - log.msg("Found nodes: %s" % found) - return server.set("a key", "a value").addCallback(get, server) - -server = Server() -# next line, or use reactor.listenUDP(5678, server.protocol) -server.listen(5678) -server.bootstrap([('127.0.0.1', 1234)]).addCallback(done, server) - -reactor.run() +# get the value associated with "my-key" from the network +result = loop.run_until_complete(node.get("my-key")) +print(result) ``` -Check out the examples folder for other examples. +## Initializing a Network +If you're starting a new network from scratch, just omit the `node.bootstrap` call in the example above. Then, bootstrap other nodes by connecting to the first node you started. -## Stand-alone Server -If all you want to do is run a local server, just start the example server: - -``` -twistd -noy examples/server.tac -``` - -## Running Tests -To run tests: - -``` -trial kademlia -``` +See the examples folder for a first node example that other nodes can bootstrap connect to and some code that gets and sets a key/value. ## Logging This library uses the standard [Python logging library](https://docs.python.org/3/library/logging.html). To see debut output printed to STDOUT, for instance, use: @@ -69,7 +52,7 @@ This library uses the standard [Python logging library](https://docs.python.org/ ```python import logging -log = logging.getLogger('rpcudp') +log = logging.getLogger('kademlia') log.setLevel(logging.DEBUG) log.addHandler(logging.StreamHandler()) ``` diff --git a/examples/example.py b/examples/example.py deleted file mode 100644 index be95613..0000000 --- a/examples/example.py +++ /dev/null @@ -1,31 +0,0 @@ -import logging -import asyncio - -from kademlia.network import Server - - -logging.basicConfig(level=logging.DEBUG) -loop = asyncio.get_event_loop() -loop.set_debug(True) - -server = Server() -server.listen(8468) - -def done(result): - print("Key result:", result) - -def setDone(result, server): - server.get("a key").addCallback(done) - -def bootstrapDone(found, server): - server.set("a key", "a value").addCallback(setDone, server) - -#server.bootstrap([("1.2.3.4", 8468)]).addCallback(bootstrapDone, server) - -try: - loop.run_forever() -except KeyboardInterrupt: - pass -finally: - server.stop() - loop.close() diff --git a/examples/first_node.py b/examples/first_node.py new file mode 100644 index 0000000..2800f0d --- /dev/null +++ b/examples/first_node.py @@ -0,0 +1,25 @@ +import logging +import asyncio + +from kademlia.network import Server + +handler = logging.StreamHandler() +formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') +handler.setFormatter(formatter) +log = logging.getLogger('kademlia') +log.addHandler(handler) +log.setLevel(logging.DEBUG) + +server = Server() +server.listen(8468) + +loop = asyncio.get_event_loop() +loop.set_debug(True) + +try: + loop.run_forever() +except KeyboardInterrupt: + pass +finally: + server.stop() + loop.close() diff --git a/examples/get.py b/examples/get.py index 7e62590..c8b0d58 100644 --- a/examples/get.py +++ b/examples/get.py @@ -8,7 +8,13 @@ if len(sys.argv) != 2: print("Usage: python get.py ") sys.exit(1) -logging.basicConfig(level=logging.DEBUG) +handler = logging.StreamHandler() +formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') +handler.setFormatter(formatter) +log = logging.getLogger('kademlia') +log.addHandler(handler) +log.setLevel(logging.DEBUG) + loop = asyncio.get_event_loop() loop.set_debug(True) diff --git a/examples/set.py b/examples/set.py index f561281..cf274cf 100644 --- a/examples/set.py +++ b/examples/set.py @@ -8,7 +8,13 @@ if len(sys.argv) != 3: print("Usage: python set.py ") sys.exit(1) -logging.basicConfig(level=logging.DEBUG) +handler = logging.StreamHandler() +formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') +handler.setFormatter(formatter) +log = logging.getLogger('kademlia') +log.addHandler(handler) +log.setLevel(logging.DEBUG) + loop = asyncio.get_event_loop() loop.set_debug(True) diff --git a/kademlia/crawling.py b/kademlia/crawling.py index a681051..33edea1 100644 --- a/kademlia/crawling.py +++ b/kademlia/crawling.py @@ -1,9 +1,11 @@ from collections import Counter -from logging import getLogger +import logging from kademlia.node import Node, NodeHeap from kademlia.utils import gather_dict +log = logging.getLogger(__name__) + class SpiderCrawl(object): """ @@ -26,8 +28,7 @@ class SpiderCrawl(object): self.node = node self.nearest = NodeHeap(self.node, self.ksize) self.lastIDsCrawled = [] - self.log = getLogger("kademlia-spider") - self.log.info("creating spider with peers: %s" % peers) + log.info("creating spider with peers: %s" % peers) self.nearest.push(peers) @@ -47,10 +48,10 @@ class SpiderCrawl(object): yet queried 4. repeat, unless nearest list has all been queried, then ur done """ - self.log.info("crawling with nearest: %s" % str(tuple(self.nearest))) + log.info("crawling with nearest: %s" % str(tuple(self.nearest))) count = self.alpha if self.nearest.getIDs() == self.lastIDsCrawled: - self.log.info("last iteration same as current - checking all in list now") + log.info("last iteration same as current - checking all in list now") count = len(self.nearest) self.lastIDsCrawled = self.nearest.getIDs() @@ -110,7 +111,7 @@ class ValueSpiderCrawl(SpiderCrawl): valueCounts = Counter(values) if len(valueCounts) != 1: args = (self.node.long_id, str(values)) - self.log.warning("Got multiple values for key %i: %s" % args) + log.warning("Got multiple values for key %i: %s" % args) value = valueCounts.most_common(1)[0][0] peerToSaveTo = self.nearestWithoutValue.popleft() diff --git a/kademlia/network.py b/kademlia/network.py index 0cdb023..41b3a8e 100644 --- a/kademlia/network.py +++ b/kademlia/network.py @@ -4,7 +4,7 @@ Package for interacting on the network at a high level. import random import pickle import asyncio -from logging import getLogger +import logging from kademlia.protocol import KademliaProtocol from kademlia.utils import digest @@ -13,6 +13,8 @@ from kademlia.node import Node from kademlia.crawling import ValueSpiderCrawl from kademlia.crawling import NodeSpiderCrawl +log = logging.getLogger(__name__) + class Server(object): """ @@ -34,7 +36,6 @@ class Server(object): """ self.ksize = ksize self.alpha = alpha - self.log = getLogger("kademlia-server") self.storage = storage or ForgetfulStorage() self.node = Node(id or digest(random.getrandbits(255))) self.transport = None @@ -61,11 +62,13 @@ class Server(object): proto_factory = lambda: self.protocol_class(self.node, self.storage, self.ksize) loop = asyncio.get_event_loop() listen = loop.create_datagram_endpoint(proto_factory, local_addr=(interface, port)) + log.info("Node %i listening on %s:%i", self.node.long_id, interface, port) self.transport, self.protocol = loop.run_until_complete(listen) # finally, schedule refreshing table self.refresh_table() def refresh_table(self): + log.debug("Refreshing routing table") asyncio.ensure_future(self._refresh_table()) loop = asyncio.get_event_loop() self.refresh_loop = loop.call_later(3600, self.refresh_table) @@ -110,6 +113,7 @@ class Server(object): addrs: A `list` of (ip, port) `tuple` pairs. Note that only IP addresses are acceptable - hostnames will cause an error. """ + log.debug("Attempting to bootstrap node with %i initial contacts", len(addrs)) cos = list(map(self.bootstrap_node, addrs)) nodes = [node for node in await asyncio.gather(*cos) if not node is None] spider = NodeSpiderCrawl(self.protocol, self.node, nodes, self.ksize, self.alpha) @@ -128,7 +132,7 @@ class Server(object): """ def handle(results): ips = [ result[1][0] for result in results if result[0] ] - self.log.debug("other nodes think our ip is %s" % str(ips)) + log.debug("other nodes think our ip is %s" % str(ips)) return ips ds = [] @@ -143,6 +147,7 @@ class Server(object): Returns: :class:`None` if not found, the value otherwise. """ + log.info("Looking up key %s", key) dkey = digest(key) # if this node has it, return it if self.storage.get(dkey) is not None: @@ -150,7 +155,7 @@ class Server(object): node = Node(dkey) nearest = self.protocol.router.findNeighbors(node) if len(nearest) == 0: - self.log.warning("There are no known neighbors to get key %s" % key) + log.warning("There are no known neighbors to get key %s" % key) return None spider = ValueSpiderCrawl(self.protocol, node, nearest, self.ksize, self.alpha) return await spider.find() @@ -159,7 +164,7 @@ class Server(object): """ Set the given string key to the given value in the network. """ - self.log.debug("setting '%s' = '%s' on network" % (key, value)) + log.info("setting '%s' = '%s' on network" % (key, value)) dkey = digest(key) return await self.set_digest(dkey, value) @@ -171,12 +176,12 @@ class Server(object): nearest = self.protocol.router.findNeighbors(node) if len(nearest) == 0: - self.log.warning("There are no known neighbors to set key %s" % dkey.hex()) + log.warning("There are no known neighbors to set key %s" % dkey.hex()) return False spider = NodeSpiderCrawl(self.protocol, node, nearest, self.ksize, self.alpha) nodes = await spider.find() - self.log.info("setting '%s' on %s" % (dkey.hex(), list(map(str, nodes)))) + log.info("setting '%s' on %s" % (dkey.hex(), list(map(str, nodes)))) # if this node is close too, then store here as well if self.node.distanceTo(node) < max([n.distanceTo(node) for n in nodes]): @@ -190,12 +195,13 @@ class Server(object): Save the state of this node (the alpha/ksize/id/immediate neighbors) to a cache file with the given fname. """ + log.info("Saving state to %s", fname) data = { 'ksize': self.ksize, 'alpha': self.alpha, 'id': self.node.id, 'neighbors': self.bootstrappableNeighbors() } if len(data['neighbors']) == 0: - self.log.warning("No known neighbors, so not writing to cache.") + log.warning("No known neighbors, so not writing to cache.") return with open(fname, 'wb') as f: pickle.dump(data, f) @@ -206,6 +212,7 @@ class Server(object): Load the state of this node (the alpha/ksize/id/immediate neighbors) from a cache file with the given fname. """ + log.info("Loading state from %s", fname) with open(fname, 'rb') as f: data = pickle.load(f) s = Server(data['ksize'], data['alpha'], data['id']) diff --git a/kademlia/protocol.py b/kademlia/protocol.py index c7fa9d3..0fbdf6d 100644 --- a/kademlia/protocol.py +++ b/kademlia/protocol.py @@ -1,6 +1,6 @@ import random import asyncio -from logging import getLogger +import logging from rpcudp.protocol import RPCProtocol @@ -8,6 +8,8 @@ from kademlia.node import Node from kademlia.routing import RoutingTable from kademlia.utils import digest +log = logging.getLogger(__name__) + class KademliaProtocol(RPCProtocol): def __init__(self, sourceNode, storage, ksize): @@ -15,7 +17,6 @@ class KademliaProtocol(RPCProtocol): self.router = RoutingTable(self, ksize, sourceNode) self.storage = storage self.sourceNode = sourceNode - self.log = getLogger("kademlia-protocol") def getRefreshIDs(self): """ @@ -37,12 +38,12 @@ class KademliaProtocol(RPCProtocol): def rpc_store(self, sender, nodeid, key, value): source = Node(nodeid, sender[0], sender[1]) self.welcomeIfNewNode(source) - self.log.debug("got a store request from %s, storing value" % str(sender)) + log.debug("got a store request from %s, storing '%s'='%s'", sender, key.hex(), value) self.storage[key] = value return True def rpc_find_node(self, sender, nodeid, key): - self.log.info("finding neighbors of %i in local table" % int(nodeid.hex(), 16)) + log.info("finding neighbors of %i in local table", int(nodeid.hex(), 16)) source = Node(nodeid, sender[0], sender[1]) self.welcomeIfNewNode(source) node = Node(key) @@ -93,7 +94,7 @@ class KademliaProtocol(RPCProtocol): if not self.router.isNewNode(node): return - self.log.info("never seen %s before, adding to router and setting nearby " % node) + log.info("never seen %s before, adding to router", node) for key, value in self.storage.items(): keynode = Node(digest(key)) neighbors = self.router.findNeighbors(keynode) @@ -110,10 +111,10 @@ class KademliaProtocol(RPCProtocol): we get no response, make sure it's removed from the routing table. """ if not result[0]: - self.log.warning("no response from %s, removing from router" % node) + log.warning("no response from %s, removing from router", node) self.router.removeContact(node) return result - self.log.info("got successful response from %s") + log.info("got successful response from %s", node) self.welcomeIfNewNode(node) return result