Benford's law example by neumino · Pull Request #1 · rethinkdb/rethinkdb-example-nodejs · GitHub
Skip to content
This repository was archived by the owner on Mar 6, 2026. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
16 changes: 16 additions & 0 deletions benford/config.sample.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
module.exports = {
rethinkdb: {
host: "localhost",
port: 28015,
db: "examples"
},
http: {
port: 3000
},
twitter: {
consumer_key: '',
consumer_secret: '',
access_token: '',
access_token_secret: ''
}
}
75 changes: 75 additions & 0 deletions benford/crawler.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
var config = require(__dirname+"/config.js");

var Twit = require('twit')
var T = new Twit({
consumer_key: config.twitter.consumer_key,
consumer_secret: config.twitter.consumer_secret,
access_token: config.twitter.access_token,
access_token_secret: config.twitter.access_token_secret
});

var r = require('rethinkdb');

var data;
var connection;

r.connect({
host: config.rethinkdb.host,
port: config.rethinkdb.port,
db: config.rethinkdb.db
}, function(err, conn) {
if (err) {
throw new Error("Could not open a connection to rethinkdb\n"+err.message)
}

connection = conn;

// Initialize the table with first the database
r.dbCreate(config.rethinkdb.db).run(connection, function(err, result) {
// If the database already exists, we'll get an error here, but we'll just keep going
r.db(config.rethinkdb.db).tableCreate('benford').run(connection, function(err, result) {
// If the table already exists, we'll get an error here, but we'll just keep going

var seeds = [];
for(var i=1; i<10; i++) {
seeds.push({id: ""+i, value: 0}); // Note: We use the digit value as the primary key and save it as a string
}
r.db(config.rethinkdb.db).table('benford').insert(seeds).run(connection, function(err, result) {
// If the database was already initialized, the inserts will not be executed since RethinkDB
// does not allow redundant primary keys (`id`)
listen();
});
});
});
});


// Listen to Twitter's stream and save the significant digits occurrences that we find
function listen() {
// Open the stream
var stream = T.stream('statuses/sample');

stream.on('tweet', function (tweet) {
var words = tweet.text.split(/\s+/); // Split a tweet on white space

var found = false; // Whether the tweet contains number or not
var data = {}; // Keep track of the data to send to the database

for(var i=0; i<words.length; i++) {
if (words[i].match(/^[1-9]/) !== null) { // Check if a word start with a digit
found = true; // We found at least one number

digit = words[i][0];
data[digit] = data[digit] || 0; // If data[digit] is undefined, set it to 0
data[digit]++
}
}
if (found === true) {
for(var digit in data) {
// Update the document by incrementing its value with data[digit]
// Not that we fire the write without expecting an answer
r.db(config.rethinkdb.db).table('benford').get(digit).update({value: r.row("value").add(data[digit])}).run(connection, {noreply: true})
}
}
});
}
11 changes: 11 additions & 0 deletions benford/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"name": "benford"
, "version": "0.0.1"
, "private": true
, "dependencies": {
"express": "4.0.0"
, "socket.io": "1.0.4"
, "rethinkdb": "1.13.0-0"
, "sticky-session": "0.1.0"
}
}
7 changes: 7 additions & 0 deletions benford/public/bootstrap.min.css

Large diffs are not rendered by default.

113 changes: 113 additions & 0 deletions benford/public/index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
<html>
<head>
<title>RethinkDB's feed with Socket.IO - Benford's law</title>
<link rel="stylesheet" href="bootstrap.min.css">
<link rel="stylesheet" type="text/css" href="style.css">
</head>

<body>
<div class="container">
<section>
<h1>RethinkDB's feed with Socket.IO</h1>
<h2>Introduction</h2>
<p>
This little example illustrates
<a href="http://en.wikipedia.org/wiki/Benford's_law">Benford's law</a> using
Twitter's streaming API.</br>The number of occurrences of each significant digit
is computed and updated in real time.
</p>
</section>

<section>
<h2>Results</h2>

<table>
<tr>
<th>Digit value</th>
<th>Occurrences</th>
<th>Percentage</th>
<th>Expected</th>
</tr>
<tr>
<td>1</td>
<td id="occurrences_1">Loading...</td>
<td id="percentage_1">Loading...</td>
<td id="expected_1">30.1%</td>
</tr>
<tr>
<td>2</td>
<td id="occurrences_2">Loading...</td>
<td id="percentage_2">Loading...</td>
<td id="expected_2">17.6%</td>
</tr>
<tr>
<td>3</td>
<td id="occurrences_3">Loading...</td>
<td id="percentage_3">Loading...</td>
<td id="expected_3">12.5%</td>
</tr>
<tr>
<td>4</td>
<td id="occurrences_4">Loading...</td>
<td id="percentage_4">Loading...</td>
<td id="expected_4">9.7%</td>
</tr>
<tr>
<td>5</td>
<td id="occurrences_5">Loading...</td>
<td id="percentage_5">Loading...</td>
<td id="expected_5">7.9%</td>
</tr>
<tr>
<td>6</td>
<td id="occurrences_6">Loading...</td>
<td id="percentage_6">Loading...</td>
<td id="expected_6">6.7%</td>
</tr>
<tr>
<td>7</td>
<td id="occurrences_7">Loading...</td>
<td id="percentage_7">Loading...</td>
<td id="expected_7">5.8%</td>
</tr>
<tr>
<td>8</td>
<td id="occurrences_8">Loading...</td>
<td id="percentage_8">Loading...</td>
<td id="expected_8">5.1%</td>
<tr>
<td>9</td>
<td id="occurrences_9">Loading...</td>
<td id="percentage_9">Loading...</td>
<td id="expected_9">4.6%</td>
</tr>
</table>
</section>

<section>
<h2>How it works</h2>
<p>This example is composed of two parts:
<ul>
<li>
A Node.js script listen to Twitter's sample stream, extract the first significant
digits of the numbers and save it in RethinkDB.
</li>
<li>
A Node.js server listen to any database changes, and broadcast it to any
client connected using <a href="http://socket.io">Socket.io</a>
</li>
</ul>
</p>
<p>
You can find the code on GitHub, in the
<a href="https://github.com/rethinkdb/rethinkdb-example-nodejs/tree/master/benford">node.js examples repository</a>
</p>
</section>

<script src="jquery-1.10.2.min.js"></script>
<script src="socket.io-1.0.4.js"></script>
<script src="main.js"></script>

</div>
</body>
</html>
6 changes: 6 additions & 0 deletions benford/public/jquery-1.10.2.min.js

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions benford/public/jquery-1.10.2.min.map

Large diffs are not rendered by default.

19 changes: 19 additions & 0 deletions benford/public/main.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
$(function() {
var socket = io();
var total = 0;

socket.on('all', function(alldata) {
for(var digit in alldata) {
total += alldata[digit]
}
for(digit in alldata) {
$("#occurrences_"+digit).html(alldata[digit])
$("#percentage_"+digit).html((alldata[digit]/total*100).toFixed(1)+"%")
}
});
socket.on('update', function(data) {
$("#occurrences_"+data.new_val.id).html(data.new_val.value)
total += data.new_val.value-data.old_val.value
$("#percentage_"+data.new_val.id).html((data.new_val.value/total*100).toFixed(1)+"%")
});
});
3 changes: 3 additions & 0 deletions benford/public/socket.io-1.0.4.js

Large diffs are not rendered by default.

13 changes: 13 additions & 0 deletions benford/public/style.css
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
td, th{
margin: 0px;
border: 1px solid #ccc;
padding: 5px 10px;
text-align: right;
}
section{
margin: 0px 40px 20px 40px;
}
h1{
text-align: center;
margin: 30px 0px;
}
63 changes: 63 additions & 0 deletions benford/server.js