Thursday, March 27, 2008

Why am I obsessed by grey codes on the n-cube?

http://www.research.att.com/~njas/sequences/A003042
http://www.research.att.com/~njas/sequences/A091299
http://www.research.att.com/~njas/sequences/A003043
http://www.research.att.com/~njas/sequences/A066037

And check out who contributed a(5) on this one:
http://www.research.att.com/~njas/sequences/A006070

Yes, for years this comes up again in my mind: how many grey codes are there in the n cube? (Otherwise known as Hamilton(ian) paths through the n-cube.) This is another one of those problems that grips me every few years. I am gripped again. My C code version runs pretty fast, but I know a(6) will take a looooong time to run. I've written a java version that serializes intermediate subproblems to solve (opening up the possibility for a distributed computing approach) but I think I've reached the end of the brute force approach.

Here's some code:

***BEGIN***

#include
#include
#include
#include

#define ALL_DESTINATIONS 255
#define STARTER_DEPTH 3

typedef unsigned char byte;

static byte size;
static byte destination = ALL_DESTINATIONS;
static unsigned int length;
static byte terminalPosition;
static byte* nodes;
static bool* nodeIndex;
static unsigned long long* sequenceCount;
static byte* edgeMasks;
static unsigned int forkDepth = 0;

void generateCodes(byte pos) {

byte node = nodes[pos];

pos++;

byte i = 0;

for(i = 0; i < size; i++) {

byte next = (node ^ edgeMasks[i]);

if(nodeIndex[next] == 0) {

if((pos == terminalPosition)) {
if(destination == ALL_DESTINATIONS || destination == next) {
nodes[pos] = next;
sequenceCount[next]++;
}
} else {
nodeIndex[next] = 1;
nodes[pos] = next;
generateCodes(pos);
nodeIndex[next] = 0;
}
}
}
}

int makeFork(unsigned int pos, char** fileName) {

if(pos == forkDepth) {

int pid = fork();

unsigned int j = 0;

if(pid == 0) {
*fileName = (char*)malloc(sizeof(char*) * pos * 3);
char* nodeString = (char*)malloc(sizeof(char*) * 3);

for(;j <= pos; j++) {
sprintf(nodeString, "%d", nodes[j]);
strcat(*fileName, nodeString);
strcat(*fileName, "-");
}
return 1;
} else {
printf("Forked %u:\t", pid);
for(;j <= pos; j++) {
printf("%d\t", nodes[j]);
}
printf("\n");
return 0;
}
} else {

byte node = nodes[pos];

pos++;

byte i = 0;

unsigned int stop = 0;

for(; !stop && i < size; i++) {

byte next = (node ^ edgeMasks[i]);

if(nodeIndex[next] == 0) {

if(destination != next) {
nodeIndex[next] = 1;
nodes[pos] = next;
stop = makeFork(pos, fileName);

if(!stop)
nodeIndex[next] = 0;
}
}
}
return stop;
}
}

int main(int argc, char* argv[]) {

if(argc >= 2) {
size = atoi(argv[1]);
} else {
size = 4;
}

forkDepth = STARTER_DEPTH;

if(argc >= 3) {
forkDepth += atoi(argv[2]);
}

if(argc >= 4) {
destination = atoi(argv[3]);
}

length = 1 << size;

nodes = (byte*)calloc(length, sizeof(byte));
nodeIndex = (bool*)calloc(length, sizeof(bool));
sequenceCount = (unsigned long long*)calloc(length, sizeof(unsigned long long));
edgeMasks = (byte*)calloc(size, sizeof(byte));

byte i = 0;

for(; i < length; i++) {
nodes[i] = 0;
nodeIndex[i] = 0;
sequenceCount[i] = 0L;
}

byte mask = 1;

for(i = 0; i < size; i++) {
edgeMasks[i] = mask;
mask <<= 1;
}

unsigned int index = 0;
unsigned int indexMask = 1;
for(i = 0; i < STARTER_DEPTH; i++) {
nodeIndex[index] = 1;
nodes[i] = index;
index ^= indexMask<<i;
}


terminalPosition = length - 1;

char* fileName = 0;

makeFork(STARTER_DEPTH-1, &fileName);
printf(fileName);

if(fileName) {

struct timespec start, stop;
clock_gettime(CLOCK_REALTIME,&start);
generateCodes(forkDepth);
clock_gettime(CLOCK_REALTIME,&stop);
double run_time = (stop.tv_sec - start.tv_sec) + (double)(stop.tv_nsec - start.tv_nsec) / 1000$

FILE* outFile = fopen(fileName, "w");
fprintf(outFile, "Runtime: %f\n", run_time);

unsigned long total = 0;
unsigned long cyclical = 0;
unsigned long acyclical = 0;
for(i = 0; i < length; i++) {
unsigned long count = sequenceCount[i];
if(count != 0) {
fprintf(outFile, "%d: %u\n", i, count);
total+=count;
}
}

for(i = 0; i < size; i++) {
cyclical += sequenceCount[1<<i];
sequenceCount[1<<i] = 0;
}

for(i = 0; i < length; i++) {
acyclical += sequenceCount[i];
}

unsigned long long actual_total = total * size * (size - 1) * 1 << size;
unsigned long long actual_cyclical = cyclical * size * (size - 1) * 1 << size;
unsigned long long actual_acyclical = acyclical * size * (size - 1) * 1<<size;

fprintf(outFile, "cyclical:\t%llu\t%llu\n", cyclical, actual_cyclical);
fprintf(outFile, "acyclical:\t%llu\t%llu\n", acyclical, actual_acyclical);
fprintf(outFile, "total:\t%llu\t%llu\n", total, actual_total);
fprintf(outFile, "\n");

fflush(outFile);
fclose(outFile);

free(fileName);

}

free(nodes);
free(nodeIndex);
free(sequenceCount);
free(edgeMasks);

return 0;
}


***END***

Just looking over the results for n=4 shows lots of symmetry and redundancy that can no doubt be exploited to avoid lots of processing. This round of obsessioon I'm going to ferret that out and conquer n=6....dammit.