rdma_common.c

// Program : rdma_common.c
// Author : Sousuke Kanamoto

#include "rdma_common.h"

extern struct ibv_mr* rdma_gpubuffer_alloc(struct ibv_pd *pd, uint32_t length, enum ibv_access_flags permission);
extern struct ibv_mr* rdma_gpubuffer_alloc_adress(struct ibv_pd *pd, void* addr, uint32_t length, enum ibv_access_flags permission);
extern void rdma_gpubuffer_free();
extern void rdma_gpubuffer_free_addr(struct ibv_mr* mr);
extern void kernel_start();
extern void kernel_start_addr(void *addr);
extern int cuAlloc(void* addr, int length);
extern int cuFree(void* addr);
extern int cuCopy(void* dst, void* src, size_t size);

void show_rdma_cmid(struct rdma_cm_id *id)
{
	if(!id){
		rdma_error("Passed ptr is NULL\n");
		return;
	}
	printf("RDMA cm id at %p \n", id);
	if(id->verbs && id->verbs->device)
		printf("dev_ctx: %p (device name: %s) \n", id->verbs, 
				id->verbs->device->name);
	if(id->channel){
		printf("cm event channel %p\n", id->channel);
		printf("QP: %p, port_space %x, port_num %u \n", id->qp, 
			id->ps,
			id->port_num);
	}
}

void show_rdma_buffer_attr(struct rdma_buffer_attr *attr){
	if(!attr){
		rdma_error("Passed attr is NULL\n");
		return;
	}
	printf("--------------------------------------------------------------\n");
	printf("buffer attr, addr: %p , len: %u , stag : 0x%x \n", 
			(void*) attr->address, 
			(unsigned int) attr->length,
			attr->stag.local_stag);
	printf("--------------------------------------------------------------\n");
}
void free_gpubuffer_rdma(struct ibv_mr *mr){
	rdma_gpubuffer_free_addr(mr);
}
struct ibv_mr* rdma_buffer_alloc_gpu(struct ibv_pd *pd, uint32_t length,
    enum ibv_access_flags permission) 
{
	struct ibv_mr *mr = NULL;
	mr = rdma_gpubuffer_alloc(pd, length, permission);
	return mr;
}
struct ibv_mr* rdma_buffer_alloc_gpu_adress(struct ibv_pd *pd, void* addr,uint32_t length,
    enum ibv_access_flags permission) 
{
	struct ibv_mr *mr = NULL;
	mr = rdma_gpubuffer_alloc_adress(pd, addr, length, permission);
	return mr;
}
int cudaAlloc(void** addr, int length){
	return cuAlloc(addr, length);
	
}
int cudaFreeAddr(void* addr){
	return cuFree(addr);
	
}

int cudaCopy(void* dst, void* src, size_t size){
	return cuCopy(dst, src, size);
}


struct ibv_mr *rdma_buffer_register(struct ibv_pd *pd, 
		void *addr, uint32_t length, 
		enum ibv_access_flags permission)
{
	struct ibv_mr *mr = NULL;
	if (!pd) {
		rdma_error("Protection domain is NULL, ignoring \n");
		return NULL;
	}
	mr = ibv_reg_mr(pd, addr, length, permission);
	if (!mr) {
		rdma_error("Failed to create mr on buffer, errno: %d \n", -errno);
		return NULL;
	}
	debug("Registered: %p , len: %u , stag: 0x%x \n", 
			mr->addr, 
			(unsigned int) mr->length, 
			mr->lkey);
	return mr;
}

void rdma_buffer_free_gpu() 
{
	rdma_gpubuffer_free();
}

void rdma_buffer_deregister(struct ibv_mr *mr) 
{
	if (!mr) { 
		rdma_error("Passed memory region is NULL, ignoring\n");
		return;
	}
	debug("Deregistered: %p , len: %u , stag : 0x%x \n", 
			mr->addr, 
			(unsigned int) mr->length, 
			mr->lkey);
	ibv_dereg_mr(mr);
}

int process_rdma_cm_event(struct rdma_event_channel *echannel, 
		enum rdma_cm_event_type expected_event,
		struct rdma_cm_event **cm_event)
{
	int ret = 1;
	ret = rdma_get_cm_event(echannel, cm_event);
	if (ret) {
		rdma_error("Failed to retrieve a cm event, errno: %d \n",
				-errno);
		return -errno;
	}
	/* lets see, if it was a good event */
	if(0 != (*cm_event)->status){
		rdma_error("CM event has non zero status: %d\n", (*cm_event)->status);
		ret = -((*cm_event)->status);
		/* important, we acknowledge the event */
		rdma_ack_cm_event(*cm_event);
		return ret;
	}
	/* if it was a good event, was it of the expected type */
	if ((*cm_event)->event != expected_event) {
		rdma_error("Unexpected event received: %s [ expecting: %s ]", 
				rdma_event_str((*cm_event)->event),
				rdma_event_str(expected_event));
		/* important, we acknowledge the event */
		rdma_ack_cm_event(*cm_event);
		return -1; // unexpected event :(
	}
	debug("A new %s type event is received \n", rdma_event_str((*cm_event)->event));
	/* The caller must acknowledge the event */
	return ret;
}

int process_work_completion_events (struct ibv_comp_channel *comp_channel,struct ibv_wc *wc, int max_wc)
{
	struct ibv_cq *cq_ptr = NULL;
	void *context = NULL;
	int ret = -1, i, total_wc = 0;
       /* We wait for the notification on the CQ channel */
	ret = ibv_get_cq_event(comp_channel, /* IO channel where we are expecting the notification */ 
		       &cq_ptr, /* which CQ has an activity. This should be the same as CQ we created before */ 
		       &context); /* Associated CQ user context, which we did set */
       if (ret) {
	       rdma_error("Failed to get next CQ event due to %d \n", -errno);
	       return -errno;
       }
       /* Request for more notifications. */
       ret = ibv_req_notify_cq(cq_ptr, 0);
       if (ret){
	       rdma_error("Failed to request further notifications %d \n", -errno);
	       return -errno;
       }
       /* We got notification. We reap the work completion (WC) element. It is 
		* unlikely but a good practice it write the CQ polling code that 
		* can handle zero WCs. ibv_poll_cq can return zero. Same logic as 
		* MUTEX conditional variables in pthread programming. */
       total_wc = 0;
       do {
	       ret = ibv_poll_cq(cq_ptr /* the CQ, we got notification for */, 
		       max_wc - total_wc /* number of remaining WC elements*/,
		       wc + total_wc/* where to store */);
	       if (ret < 0) {
		       rdma_error("Failed to poll cq for wc due to %d \n", ret);
		       /* ret is errno here */
		       return ret;
	       }
	       total_wc += ret;
       } while (total_wc < max_wc); 
       /* Now we check validity and status of I/O work completions */
       for( i = 0 ; i < total_wc ; i++) {
	       if (wc[i].status != IBV_WC_SUCCESS) {
		       rdma_error("Work completion (WC) has error status: %d (means: %s) at index %d\n", 
				       -wc[i].status, 
				       ibv_wc_status_str(wc[i].status), 
				       i);
		       /* return negative value */
		       return -(wc[i].status);
	       }
       }
       debug("%d WC are completed \n", total_wc);
       /* Similar to connection management events, we need to acknowledge CQ events */
       ibv_ack_cq_events(cq_ptr, 
		       1 /* we received one event notification. This is not 
		       number of WC elements */);
       return total_wc; 
}

/* Code acknowledgment: rping.c from librdmacm/examples */
int get_addr(char *dst, struct sockaddr *addr)
{
	struct addrinfo *res;
	int ret = -1;
	ret = getaddrinfo(dst, NULL, NULL, &res);
	if (ret) {
		rdma_error("getaddrinfo failed - invalid hostname or IP address\n");
		return ret;
	}
	memcpy(addr, res->ai_addr, sizeof(struct sockaddr_in));
	freeaddrinfo(res);
	return ret;
}

void write_rdma_buffer()
{
	kernel_start();
}
void write_rdma_buffer_addr(void *addr)
{
	kernel_start_addr(addr);
}