Re: Free Random Password Generator

From: Bill Unruh (
Date: 10/05/04

Date: Tue, 5 Oct 2004 09:48:16 -0700

References: <> <>
     <cjq2qd$flq$> <>
     <cjsfol$i55$> <>

]Bill Unruh wrote:

]> I think what the person really wanted was a password generator for
]> passwords that had some vague chance of being memorable.

I finally found the program. This program reads a dictionary, or other
source of words (I use an English dictionary with 400,000 words)
from stdin. It then produces output of 20 random "words" which follow the
same quatrgram distribution (4 consecutive letters) as the words in the
dictionary. (dictionary is better than a text passage as the text is
heavily biased toward short repeated words, and so will the output be). The
output also comes with the bitwise "entropy" estimate for the word produced
(equal to the log base 2 of the probability of each of the fourth letters
in the quatragrams produced). This by experiment is about 3-4 bits per

It is at present set up to produce passwords of max length 8 characters.
Change MAXLEN if you want something else.

#include <ctype.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* This program is copyright W. Unruh, 2004 <> and is
 * released under the GPL license. This copyright notice is not to be
 * removed from any copies or derivatives of this program.

/* Read a file in from stdin, and calculate the frequency of quatrgrams
 * (four letter combinations) The word starts with C_START and ends with
 * C_END. Only lower case letters are used and only punctuation is - and '.
 * The variable node->count is the number of occurances of the first three
 * letters there are in the input list of words. The node->p[l] is how many
 * occurances of the letter numbered l occur after the first three letters.
 * Only lower case letters are used.
 * The output is a list of words, randomly made up by choosing the next
 * letter with the probability p[l]/count. The log probability is the log
 * to base 2 (bits) product of p[l]/count for each letter in the word.
 * Useage
 * wget [n] <dictionary
 * n=number of random words, together with their bit count randomness
 * estimate, based on the dictionary used. (10<=n<=100)
 * It is better to use a dictionary, as the words created tend to be
 * longer. (in text most words are short, and this ups the probability that
 * the output will have short words.)
 * Eg, the sorted unique words from Shakespeare. Or a massive dictionary or
 * word list would be useful.
 * The general result is that each letter is worth about 3 bits of
 * randomness. This undoubtedly is an underestimate, as the words in the
 * dictionary tend to be obscure (most of them). Thus the probability of
 * the quatrgrams are biased towards unusual words. Of course if an attacker
 * used the same dictionary, then this would give a pretty accurate
 * estimate of the randomness of the made up words.
 * The program uses /dev/urandom to get the random probabilities to select
 * the letters.

static const char wchars[] = "abcdefghijklmnopqrstuvwxyz-'";
     //list of lower case characters recognized by the program. All others
     //are silently ignored. Note that if you want to extend the list, or
     //use some other language, you must change also the next three
     //constants. V
#define VECSZ 30 // number of characters in list + 2
#define C_START VECSZ-2 //start of word designator.
#define C_END VECSZ-1 //end of word designator.
#define MAXLEN 8 //Max length of output word.
typedef struct node {
  unsigned count;
  unsigned p[VECSZ];
} node;
static node (*modelp)[VECSZ][VECSZ][VECSZ];
#define model (*modelp)
char cont='a';
FILE *urandom;

unsigned rrand();
int main(int argc,char *argv[])
  unsigned i, j, k, l,nout=20;

  modelp = malloc(sizeof(model)); //allocate space for the array
  if (!modelp) {
    fprintf(stderr, "no memory");

  if(argc>1) // argument is max number of output words
 // printf("%d --- %s ---- %d\n",argc,argv[1],nout);

  for (i = 0; i < VECSZ; i++) {
    for (j = 0; j < VECSZ; j++) {
      for (k = 0; k < VECSZ; k++) {
        model[i][j][k].count = 0;
        for (l = 0; l < VECSZ; l++)
          model[i][j][k].p[l] = 0;

  l = C_END;
  i = j = k = C_START;
  for (;;) {
    int ch = getc(stdin);
    const char *p;
    node *n = &model[i][j][k];
    if (ch == EOF || isspace(ch)) {
      if (l != C_END) {
        l = C_END;
        i = j = k = C_START;
      if (ch == EOF)
    p = strchr(wchars, tolower(ch));
    if (!p) //character not in set
    l = p - wchars; //number of the character
    i = j; j = k; k = l;

     int nn,nc;char input[256];
     unsigned random;
     if(urandom == (FILE *)NULL)
        fprintf(stderr,"Cannot open /dev/urandom\n");
  for (nn=0;nn<nout;nn++) {
    double prob = 0;
    i = j = k = C_START;
    for (;;) {
      node *n = &model[i][j][k];
      unsigned p = n->count;
// unsigned max = RAND_MAX - (RAND_MAX % p);
      unsigned max=0xffffffff-((0xffffffff)%p);
      unsigned r;
      if(++nc>MAXLEN) break;
      do r = rrand(); while (r >= max);
      r = r%p;
      for (l = 0; r >= n->p[l]; r -= n->p[l++])
      prob -= log((double)n->p[l]/(double)n->count)/log(2);
      if (l == C_END)
      fputc(wchars[l], stdout);
      i = j; j = k; k = l;
    printf(" [%g]\n", prob);

  return (0);

unsigned rrand()
   int i,r;
   char *cr;
   cr=(char *)&r;