# Makefile for the optimize matrix multiply assignment


# C and Fortran compilers
CC = /share/apps/local/bin/gcc
FC = /share/apps/local/bin/gfortran

# Compiler optimization flags.  You will definitely want to play with these!
OPTFLAGS = -O3
CFLAGS = $(OPTFLAGS)
FFLAGS = $(OPTFLAGS)

CPPFLAGS = "-DCOMPILER=\"$(CC)\"" "-DFLAGS=\"$(OPTFLAGS)\""

# Add -DDEBUG_RUN to CPPFLAGS to cut down on the cases.

# Compile a C version (using basic_dgemm.c, in this case):
LIBS = -lm -lrt
OBJS = matmul.o timing.o

# Libraries and include files for ATLAS
LIBATLAS=-L/share/apps/local/atlas/lib/ -lcblas -latlas
LIBPATLAS=-L/share/apps/local/atlas/lib/ -lptcblas -latlas -lpthread
INCATLAS=-I/share/apps/local/atlas/include

.PHONY:	all
all:	matmul matmul-basic matmul-blocked matmul-blas matmul-pblas matmul-f2c

# ---
# Rules to build the drivers

matmul: $(OBJS) dgemm.o
	$(CC) -o $@ $^ $(LDFLAGS) $(LIBS)

matmul-basic: $(OBJS) basic_dgemm.o
	$(CC) -o $@ $^ $(LDFLAGS) $(LIBS)

matmul-blocked:	$(OBJS) blocked_dgemm.o
	$(CC) -o $@ $^ $(LDFLAGS) $(LIBS)

matmul-blas:	$(OBJS) blas_dgemm.o
	$(CC) -o $@ $^ $(LDFLAGS) $(LIBS) $(LIBATLAS)

matmul-pblas:	$(OBJS) blas_dgemm.o
	$(CC) -o $@ $^ $(LDFLAGS) $(LIBS) $(LIBPATLAS)

matmul-f2c:	$(OBJS) f2c_dgemm.o fdgemm.o
	$(FC) -o $@ $^ $(LDFLAGS) $(LIBS) 

# --
# Rules to build object files

%.o:%.c
	$(CC) -c $(CFLAGS) $(CPPFLAGS) $<

%.o:%.f
	$(FC) -c $(FFLAGS) $<

blas_dgemm.o: blas_dgemm.c
	$(CC) -c $(CFLAGS) $(CPPFLAGS) $(INCATLAS) $< 

# ---
# This is just a suggestion on how to generate timing plots...  Feel
# free to improve on these, so long as you show MFlop/s v. matrix size.
# Submit matmul via qsub to generate the raw timing data.

.PHONY:	run
run: matmul
	./make_sge.sh ./matmul mine
	qsub serial.qsub

.PHONY:	run-basic
run-basic: matmul-basic
	./make_sge.sh ./matmul-basic basic
	qsub serial.qsub

.PHONY:	run-blocked
run-blocked: matmul-blocked
	./make_sge.sh ./matmul-blocked blocked
	qsub serial.qsub

.PHONY:	run-blas
run-blas: matmul-blas
	./make_sge.sh ./matmul-blas atlas
	qsub serial.qsub

.PHONY:	run-pblas
run-pblas: matmul-pblas
	./make_sge.sh ./matmul-pblas patlas
	qsub serial.qsub

.PHONY:	run-f2c
run-f2c: matmul-f2c
	./make_sge.sh ./matmul-f2c f2c
	qsub serial.qsub

timing:	timing.raw
	awk '/Size/ { print $$2 " " $$4 }' timing.raw > timing

timing.pdf:	timing
	echo "set term pdf; set output 'timing.pdf';" \
	  | gnuplot - timing.gnuplot

# ---

.PHONY:	clean realclean tgz
clean:
	rm -f matmul matmul-basic matmul-blocked matmul-blas matmul-pblas \
		matmul-f2c *.o
	rm -f serial.qsub*

realclean:	clean
	rm -f *~ timing timing.raw* timing.pdf

tgz: realclean
	(cd ..; tar -czf matmul.tgz matmul_files)
