summaryrefslogtreecommitdiff
path: root/gcell/src/lib/runtime/spu/gc_spu_jd_queue.c
blob: 22752fe68e17c91a674ee6fcea0d21251b506f74 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
/* -*- c++ -*- */
/*
 * Copyright 2007 Free Software Foundation, Inc.
 * 
 * This file is part of GNU Radio
 * 
 * GNU Radio is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 3, or (at your option)
 * any later version.
 * 
 * GNU Radio is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 */

#include "gc_jd_queue.h"
#include "mutex_lock.h"
#include "mutex_unlock.h"

extern int gc_sys_tag;

bool
gc_jd_queue_dequeue(gc_eaddr_t q, gc_eaddr_t *item_ea,
		    int jd_tag, gc_job_desc_t *item)
{
  gc_jd_queue_t	local_q;

  // Before aquiring the lock, see if it's possible that there's
  // something in the queue.  Checking in this way makes it easier
  // for the PPE to insert things, since we're not contending for
  // the lock unless there is something in the queue.

  // copy in the queue structure
  mfc_get(&local_q, q, sizeof(gc_jd_queue_t), gc_sys_tag, 0, 0);
  mfc_write_tag_mask(1 << gc_sys_tag);	// the tag we're interested in
  mfc_read_tag_status_all();		// wait for DMA to complete

  if (local_q.head == 0){		// empty
    return false;
  }

  // When we peeked, head was non-zero.  Now grab the
  // lock and do it for real.

  _mutex_lock(q + offsetof(gc_jd_queue_t, mutex));

  // copy in the queue structure
  mfc_get(&local_q, q, sizeof(gc_jd_queue_t), gc_sys_tag, 0, 0);
  mfc_write_tag_mask(1 << gc_sys_tag);	// the tag we're interested in
  mfc_read_tag_status_all();		// wait for DMA to complete

  if (local_q.head == 0){		// empty
    _mutex_unlock(q + offsetof(gc_jd_queue_t, mutex));
    return false;
  }

  // copy in job descriptor at head of queue
  *item_ea = local_q.head;
  
  // We must use the fence with the jd_tag to ensure that any
  // previously initiated put of a job desc is locally ordered before
  // the get of the new one.
  mfc_getf(item, local_q.head, sizeof(gc_job_desc_t), jd_tag, 0, 0);
  mfc_write_tag_mask(1 << jd_tag);	// the tag we're interested in
  mfc_read_tag_status_all();		// wait for DMA to complete

  local_q.head = item->sys.next;
  item->sys.next = 0;
  if (local_q.head == 0)		// now empty?
    local_q.tail = 0;


  // copy the queue structure back out
  mfc_put(&local_q, q, sizeof(gc_jd_queue_t), gc_sys_tag, 0, 0);
  mfc_write_tag_mask(1 << gc_sys_tag);	// the tag we're interested in
  mfc_read_tag_status_all();		// wait for DMA to complete

  // Q: FIXME do we need to order stores in EA or can we just clear the
  // local copy of the mutex above and blast it out, removing the need
  // for this explicit unlock?
  //
  // A: Manual says it's better to use an atomic op rather than
  // a normal DMA, and that a putlluc is better than a putllc if
  // you can use it.

  _mutex_unlock(q + offsetof(gc_jd_queue_t, mutex));
  return true;
}


void
gc_jd_queue_getllar(gc_eaddr_t q)
{
  // get reservation that includes the tail of the queue
  gc_eaddr_t	tail = q + offsetof(gc_jd_queue_t, tail);
    
  char _tmp[256];
  char *buf = (char *) ALIGN(_tmp, 128);	// get cache-aligned buffer

  mfc_getllar(buf, ALIGN128_EA(tail), 0, 0);
  spu_readch(MFC_RdAtomicStat);
}