Socket Programming for Multicore Hardware

CS 441 Lecture, Dr. Lawlor

Here's some code to do client-server socket based parallelism on a multicore machine. Note that unlike with threads, "var" is NOT shared between client and server.

// Socket-based multicore parallelism
#include "osl/socket.h"
#include "osl/socket.cpp"
#include <sys/wait.h> /* for wait() */
#include <unistd.h> /* for fork() */

int var=0;

/* Run child process's code.  Socket connects to parent. */
void run_child(SOCKET s) {
	cout<<"Child alive! cvar="<<var<<"   CCCCCCC\n";
	var=1;
	cout<<"Child done. cvar="<<var<<"    CCCCCCC\n";
}

/* Run parent process's code.  Socket connects to child */
void run_parent(SOCKET s) {
	cout<<"Parent alive! pvar="<<var<<"      pppppppppp\n";
	var=2;
	cout<<"Parent done. pvar="<<var<<"      pppppppppp\n";
}


int foo(void) {
	unsigned int port=0;
	SERVER_SOCKET serv=skt_server(&port);
	int newpid=fork();
	if (newpid!=0) { /* I'm the parent */
		SOCKET s=skt_accept(serv,0,0);
		run_parent(s);
		skt_close(s);
		int status=0; 
		wait(&status); /* wait for child to finish */
	} else { /* I'm the child */
		SOCKET s=skt_connect(skt_lookup_ip("127.0.0.1"),port,2);
		usleep(1000); /* slow down child, to avoid corrupted cout! */
		run_child(s);
		skt_close(s);
		exit(0); /* close out child process when done */
	}
	return 0;
}

(Try this in NetRun now!)

Here's a more complex example, where we're rendering portions of the Mandlebrot Set:

// Socket-based multicore parallelism (for dual-core machine)
#include "osl/socket.h"
#include "osl/socket.cpp"
#include <sys/wait.h> /* for wait() */
#include <unistd.h> /* for fork() */
#include <complex>

/**
 A linear function in 2 dimensions: returns a double as a function of (x,y).
*/
class linear2d_function {
public:
	double a,b,c;
	void set(double a_,double b_,double c_) {a=a_;b=b_;c=c_;}
	linear2d_function(double a_,double b_,double c_) {set(a_,b_,c_);}
	double evaluate(double x,double y) const {return x*a+y*b+c;}
};

	const int wid=100, ht=100;
	// Set up coordinate system to render the Mandelbrot Set:
	double scale=3.0/wid;
	linear2d_function fx(scale,0.0,-1.0); // returns c given pixels 
	linear2d_function fy(0.0,scale,0.0);

char render_mset(int x,int y) {
/* Walk this Mandelbrot Set pixel */
	typedef std::complex<double> COMPLEX;
	COMPLEX c(fx.evaluate(x,y),fy.evaluate(x,y));
	COMPLEX z(0.0);
	int count;
	enum {max_count=26};
	for (count=0;count<max_count;count++) {
		z=z*z+c;
		if ((z.real()*z.real()+z.imag()*z.imag())>4.0) break;
	}
		
	return 'A'+count;
}

class row {
public:
	char data[wid];
};

/* Run as process "rank", one process among "size" others.  
   Each socket connects you with another rank: s[0] connects to rank 0.
*/
void run(int rank,int size,SOCKET *s) {
	row img[ht]; /* local copy of the final image */
	
	/* Render our piece of the image */
	int ystart=ht*rank/size, yend=ht*(rank+1)/size;
	for (int y=ystart;y<yend;y++)
	{
		for (int x=0;x<wid;x++) img[y].data[x]=render_mset(x,y);
	}
	
	if (rank>0) 
	{ /* send our partial piece to rank 0 */
		skt_sendN(s[0],&img[ystart].data[0],sizeof(row)*(yend-ystart));
	}
	else
	{ /* rank 0: receive partial pieces from other ranks */
		for (int r=1;r<size;r++) {
			skt_recvN(s[r],&img[ht*r/size].data[0],sizeof(row)*(yend-ystart));
		}
		/* Print out assembled image */
		for (int y=0;y<ht;y++) {
			for (int x=0;x<wid;x++) std::cout<<img[y].data[x];
			std::cout<<"\n";
		}
	}
}


int foo(void) {
	unsigned int port=0;
	const int size=2; /* dual-core machine */
	SOCKET s[size]={0};
	SERVER_SOCKET serv=skt_server(&port);
	int newpid=fork();
	if (newpid!=0) { /* I'm the parent */
		s[1]=skt_accept(serv,0,0);
		usleep(1000); /* slow down child, to avoid corrupted cout! */
		run(0,size,s);
		skt_close(s[1]);
		int status=0; 
		wait(&status); /* wait for child to finish */
	} else { /* I'm the child */
		s[0]=skt_connect(skt_lookup_ip("127.0.0.1"),port,2);
		run(1,size,s);
		skt_close(s[0]);
		exit(0); /* close out child process when done */
	}
	return 0;
}

(Try this in NetRun now!)

The above example is specialized for dual-core machines. Here's a generic multi-core version (the "run" method is identical).

int foo(void) {
	unsigned int port=0;
	const int size=4; /* quad-core machine */
	SOCKET s[size]={0};
	SERVER_SOCKET serv=skt_server(&port);
	for (int child=1;child<size;child++) {
		int newpid=fork();
		if (newpid==0) { /* I'm the child */
			s[0]=skt_connect(skt_lookup_ip("127.0.0.1"),port,2);
			run(child,size,s);
			skt_close(s[0]);
			exit(0); /* close out child process when done */
		}
		/* else I'm the parent */
		s[child]=skt_accept(serv,0,0);
	}
	/* Now that all children are created, run as parent */
	run(0,size,s);
	/* Once parent is done, collect all the children */
	for (int child=1;child<size;child++) {
		skt_close(s[child]);
		int status=0; 
		wait(&status); /* wait for child to finish */
	}
	return 0;
}

(Try this in NetRun now!)

Note that the above still doesn't let arbitrary children communicate; only parent and child. This is relatively easy to fix, by making children make connections amongst themselves.

Also note that there's nothing special about using "fork" to make the processes above; we could actually make the processes on separate *machines* entirely, and TCP socket communication would work exactly the same way. This "message-passing" distributed-memory programming style is highly portable!