Back to home page

LXR

 
 

    


File indexing completed on 2025-05-11 08:24:14

0001 /**
0002  * @file
0003  *
0004  * @ingroup rtems_bdbuf
0005  *
0006  * Block device buffer management.
0007  */
0008 
0009 /*
0010  * Disk I/O buffering
0011  * Buffer managment
0012  *
0013  * Copyright (C) 2001 OKTET Ltd., St.-Peterburg, Russia
0014  * Author: Andrey G. Ivanov <Andrey.Ivanov@oktet.ru>
0015  *         Victor V. Vengerov <vvv@oktet.ru>
0016  *         Alexander Kukuta <kam@oktet.ru>
0017  *
0018  * Copyright (C) 2008,2009 Chris Johns <chrisj@rtems.org>
0019  *    Rewritten to remove score mutex access. Fixes many performance
0020  *    issues.
0021  *
0022  * Copyright (C) 2009, 2017 embedded brains GmbH & Co. KG
0023  */
0024 
0025 /**
0026  * Set to 1 to enable debug tracing.
0027  */
0028 #define RTEMS_BDBUF_TRACE 0
0029 
0030 #ifdef HAVE_CONFIG_H
0031 #include "config.h"
0032 #endif
0033 #include <limits.h>
0034 #include <errno.h>
0035 #include <stdio.h>
0036 #include <stdlib.h>
0037 #include <string.h>
0038 #include <inttypes.h>
0039 #include <pthread.h>
0040 
0041 #include <rtems.h>
0042 #include <rtems/thread.h>
0043 #include <rtems/score/assert.h>
0044 
0045 #include "rtems/bdbuf.h"
0046 
0047 #define BDBUF_INVALID_DEV NULL
0048 
0049 /*
0050  * Simpler label for this file.
0051  */
0052 #define bdbuf_config rtems_bdbuf_configuration
0053 
0054 /**
0055  * A swapout transfer transaction data. This data is passed to a worked thread
0056  * to handle the write phase of the transfer.
0057  */
0058 typedef struct rtems_bdbuf_swapout_transfer
0059 {
0060   rtems_chain_control   bds;         /**< The transfer list of BDs. */
0061   rtems_disk_device    *dd;          /**< The device the transfer is for. */
0062   bool                  syncing;     /**< The data is a sync'ing. */
0063   rtems_blkdev_request  write_req;   /**< The write request. */
0064 } rtems_bdbuf_swapout_transfer;
0065 
0066 /**
0067  * Swapout worker thread. These are available to take processing from the
0068  * main swapout thread and handle the I/O operation.
0069  */
0070 typedef struct rtems_bdbuf_swapout_worker
0071 {
0072   rtems_chain_node             link;     /**< The threads sit on a chain when
0073                                           * idle. */
0074   rtems_id                     id;       /**< The id of the task so we can wake
0075                                           * it. */
0076   bool                         enabled;  /**< The worker is enabled. */
0077   rtems_bdbuf_swapout_transfer transfer; /**< The transfer data for this
0078                                           * thread. */
0079 } rtems_bdbuf_swapout_worker;
0080 
0081 /**
0082  * Buffer waiters synchronization.
0083  */
0084 typedef struct rtems_bdbuf_waiters {
0085   unsigned                 count;
0086   rtems_condition_variable cond_var;
0087 } rtems_bdbuf_waiters;
0088 
0089 /**
0090  * The BD buffer cache.
0091  */
0092 typedef struct rtems_bdbuf_cache
0093 {
0094   rtems_id            swapout;           /**< Swapout task ID */
0095   bool                swapout_enabled;   /**< Swapout is only running if
0096                                           * enabled. Set to false to kill the
0097                                           * swap out task. It deletes itself. */
0098   rtems_chain_control swapout_free_workers; /**< The work threads for the swapout
0099                                              * task. */
0100 
0101   rtems_bdbuf_buffer* bds;               /**< Pointer to table of buffer
0102                                           * descriptors. */
0103   void*               buffers;           /**< The buffer's memory. */
0104   size_t              buffer_min_count;  /**< Number of minimum size buffers
0105                                           * that fit the buffer memory. */
0106   size_t              max_bds_per_group; /**< The number of BDs of minimum
0107                                           * buffer size that fit in a group. */
0108   uint32_t            flags;             /**< Configuration flags. */
0109 
0110   rtems_mutex         lock;              /**< The cache lock. It locks all
0111                                           * cache data, BD and lists. */
0112   rtems_mutex         sync_lock;         /**< Sync calls block writes. */
0113   bool                sync_active;       /**< True if a sync is active. */
0114   rtems_id            sync_requester;    /**< The sync requester. */
0115   rtems_disk_device  *sync_device;       /**< The device to sync and
0116                                           * BDBUF_INVALID_DEV not a device
0117                                           * sync. */
0118 
0119   rtems_bdbuf_buffer* tree;              /**< Buffer descriptor lookup AVL tree
0120                                           * root. There is only one. */
0121   rtems_chain_control lru;               /**< Least recently used list */
0122   rtems_chain_control modified;          /**< Modified buffers list */
0123   rtems_chain_control sync;              /**< Buffers to sync list */
0124 
0125   rtems_bdbuf_waiters access_waiters;    /**< Wait for a buffer in
0126                                           * ACCESS_CACHED, ACCESS_MODIFIED or
0127                                           * ACCESS_EMPTY
0128                                           * state. */
0129   rtems_bdbuf_waiters transfer_waiters;  /**< Wait for a buffer in TRANSFER
0130                                           * state. */
0131   rtems_bdbuf_waiters buffer_waiters;    /**< Wait for a buffer and no one is
0132                                           * available. */
0133 
0134   rtems_bdbuf_swapout_transfer *swapout_transfer;
0135   rtems_bdbuf_swapout_worker *swapout_workers;
0136 
0137   size_t              group_count;       /**< The number of groups. */
0138   rtems_bdbuf_group*  groups;            /**< The groups. */
0139   rtems_id            read_ahead_task;   /**< Read-ahead task */
0140   rtems_chain_control read_ahead_chain;  /**< Read-ahead request chain */
0141   bool                read_ahead_enabled; /**< Read-ahead enabled */
0142   rtems_status_code   init_status;       /**< The initialization status */
0143   pthread_once_t      once;
0144 } rtems_bdbuf_cache;
0145 
0146 typedef enum {
0147   RTEMS_BDBUF_FATAL_CACHE_WAIT_2,
0148   RTEMS_BDBUF_FATAL_CACHE_WAIT_TO,
0149   RTEMS_BDBUF_FATAL_CACHE_WAKE,
0150   RTEMS_BDBUF_FATAL_PREEMPT_DIS,
0151   RTEMS_BDBUF_FATAL_PREEMPT_RST,
0152   RTEMS_BDBUF_FATAL_RA_WAKE_UP,
0153   RTEMS_BDBUF_FATAL_RECYCLE,
0154   RTEMS_BDBUF_FATAL_SO_WAKE_1,
0155   RTEMS_BDBUF_FATAL_SO_WAKE_2,
0156   RTEMS_BDBUF_FATAL_STATE_0,
0157   RTEMS_BDBUF_FATAL_STATE_2,
0158   RTEMS_BDBUF_FATAL_STATE_4,
0159   RTEMS_BDBUF_FATAL_STATE_5,
0160   RTEMS_BDBUF_FATAL_STATE_6,
0161   RTEMS_BDBUF_FATAL_STATE_7,
0162   RTEMS_BDBUF_FATAL_STATE_8,
0163   RTEMS_BDBUF_FATAL_STATE_9,
0164   RTEMS_BDBUF_FATAL_STATE_10,
0165   RTEMS_BDBUF_FATAL_STATE_11,
0166   RTEMS_BDBUF_FATAL_SWAPOUT_RE,
0167   RTEMS_BDBUF_FATAL_TREE_RM,
0168   RTEMS_BDBUF_FATAL_WAIT_EVNT,
0169   RTEMS_BDBUF_FATAL_WAIT_TRANS_EVNT
0170 } rtems_bdbuf_fatal_code;
0171 
0172 /**
0173  * The events used in this code. These should be system events rather than
0174  * application events.
0175  */
0176 #define RTEMS_BDBUF_SWAPOUT_SYNC   RTEMS_EVENT_2
0177 #define RTEMS_BDBUF_READ_AHEAD_WAKE_UP RTEMS_EVENT_1
0178 
0179 static rtems_task rtems_bdbuf_swapout_task(rtems_task_argument arg);
0180 
0181 static rtems_task rtems_bdbuf_read_ahead_task(rtems_task_argument arg);
0182 
0183 /**
0184  * The Buffer Descriptor cache.
0185  */
0186 static rtems_bdbuf_cache bdbuf_cache = {
0187   .lock = RTEMS_MUTEX_INITIALIZER(NULL),
0188   .sync_lock = RTEMS_MUTEX_INITIALIZER(NULL),
0189   .access_waiters = { .cond_var = RTEMS_CONDITION_VARIABLE_INITIALIZER(NULL) },
0190   .transfer_waiters = {
0191     .cond_var = RTEMS_CONDITION_VARIABLE_INITIALIZER(NULL)
0192   },
0193   .buffer_waiters = { .cond_var = RTEMS_CONDITION_VARIABLE_INITIALIZER(NULL) },
0194   .once = PTHREAD_ONCE_INIT
0195 };
0196 
0197 #if RTEMS_BDBUF_TRACE
0198 /**
0199  * If true output the trace message.
0200  */
0201 bool rtems_bdbuf_tracer;
0202 
0203 /**
0204  * Return the number of items on the list.
0205  *
0206  * @param list The chain control.
0207  * @return uint32_t The number of items on the list.
0208  */
0209 uint32_t
0210 rtems_bdbuf_list_count (rtems_chain_control* list)
0211 {
0212   rtems_chain_node* node = rtems_chain_first (list);
0213   uint32_t          count = 0;
0214   while (!rtems_chain_is_tail (list, node))
0215   {
0216     count++;
0217     node = rtems_chain_next (node);
0218   }
0219   return count;
0220 }
0221 
0222 /**
0223  * Show the usage for the bdbuf cache.
0224  */
0225 void
0226 rtems_bdbuf_show_usage (void)
0227 {
0228   uint32_t group;
0229   uint32_t total = 0;
0230   uint32_t val;
0231 
0232   for (group = 0; group < bdbuf_cache.group_count; group++)
0233     total += bdbuf_cache.groups[group].users;
0234   printf ("bdbuf:group users=%lu", total);
0235   val = rtems_bdbuf_list_count (&bdbuf_cache.lru);
0236   printf (", lru=%lu", val);
0237   total = val;
0238   val = rtems_bdbuf_list_count (&bdbuf_cache.modified);
0239   printf (", mod=%lu", val);
0240   total += val;
0241   val = rtems_bdbuf_list_count (&bdbuf_cache.sync);
0242   printf (", sync=%lu", val);
0243   total += val;
0244   printf (", total=%lu\n", total);
0245 }
0246 
0247 /**
0248  * Show the users for a group of a bd.
0249  *
0250  * @param where A label to show the context of output.
0251  * @param bd The bd to show the users of.
0252  */
0253 void
0254 rtems_bdbuf_show_users (const char* where, rtems_bdbuf_buffer* bd)
0255 {
0256   const char* states[] =
0257     { "FR", "EM", "CH", "AC", "AM", "AE", "AP", "MD", "SY", "TR", "TP" };
0258 
0259   printf ("bdbuf:users: %15s: [%" PRIu32 " (%s)] %td:%td = %" PRIu32 " %s\n",
0260           where,
0261           bd->block, states[bd->state],
0262           bd->group - bdbuf_cache.groups,
0263           bd - bdbuf_cache.bds,
0264           bd->group->users,
0265           bd->group->users > 8 ? "<<<<<<<" : "");
0266 }
0267 #else
0268 #define rtems_bdbuf_tracer (0)
0269 #define rtems_bdbuf_show_usage() ((void) 0)
0270 #define rtems_bdbuf_show_users(_w, _b) ((void) 0)
0271 #endif
0272 
0273 /**
0274  * The default maximum height of 32 allows for AVL trees having between
0275  * 5,704,880 and 4,294,967,295 nodes, depending on order of insertion.  You may
0276  * change this compile-time constant as you wish.
0277  */
0278 #ifndef RTEMS_BDBUF_AVL_MAX_HEIGHT
0279 #define RTEMS_BDBUF_AVL_MAX_HEIGHT (32)
0280 #endif
0281 
0282 static void
0283 rtems_bdbuf_fatal (rtems_fatal_code error)
0284 {
0285   rtems_fatal (RTEMS_FATAL_SOURCE_BDBUF, error);
0286 }
0287 
0288 static void
0289 rtems_bdbuf_fatal_with_state (rtems_bdbuf_buf_state state,
0290                               rtems_bdbuf_fatal_code error)
0291 {
0292   rtems_bdbuf_fatal ((((uint32_t) state) << 16) | error);
0293 }
0294 
0295 /**
0296  * Searches for the node with specified dd/block.
0297  *
0298  * @param root pointer to the root node of the AVL-Tree
0299  * @param dd disk device search key
0300  * @param block block search key
0301  * @retval NULL node with the specified dd/block is not found
0302  * @return pointer to the node with specified dd/block
0303  */
0304 static rtems_bdbuf_buffer *
0305 rtems_bdbuf_avl_search (rtems_bdbuf_buffer** root,
0306                         const rtems_disk_device *dd,
0307                         rtems_blkdev_bnum    block)
0308 {
0309   rtems_bdbuf_buffer* p = *root;
0310 
0311   while ((p != NULL) && ((p->dd != dd) || (p->block != block)))
0312   {
0313     if (((uintptr_t) p->dd < (uintptr_t) dd)
0314         || ((p->dd == dd) && (p->block < block)))
0315     {
0316       p = p->avl.right;
0317     }
0318     else
0319     {
0320       p = p->avl.left;
0321     }
0322   }
0323 
0324   return p;
0325 }
0326 
0327 /**
0328  * Inserts the specified node to the AVl-Tree.
0329  *
0330  * @param root pointer to the root node of the AVL-Tree
0331  * @param node Pointer to the node to add.
0332  * @retval 0 The node added successfully
0333  * @retval -1 An error occurred
0334  */
0335 static int
0336 rtems_bdbuf_avl_insert(rtems_bdbuf_buffer** root,
0337                        rtems_bdbuf_buffer*  node)
0338 {
0339   const rtems_disk_device *dd = node->dd;
0340   rtems_blkdev_bnum block = node->block;
0341 
0342   rtems_bdbuf_buffer*  p = *root;
0343   rtems_bdbuf_buffer*  q;
0344   rtems_bdbuf_buffer*  p1;
0345   rtems_bdbuf_buffer*  p2;
0346   rtems_bdbuf_buffer*  buf_stack[RTEMS_BDBUF_AVL_MAX_HEIGHT];
0347   rtems_bdbuf_buffer** buf_prev = buf_stack;
0348 
0349   bool modified = false;
0350 
0351   if (p == NULL)
0352   {
0353     *root = node;
0354     node->avl.left = NULL;
0355     node->avl.right = NULL;
0356     node->avl.bal = 0;
0357     return 0;
0358   }
0359 
0360   while (p != NULL)
0361   {
0362     *buf_prev++ = p;
0363 
0364     if (((uintptr_t) p->dd < (uintptr_t) dd)
0365         || ((p->dd == dd) && (p->block < block)))
0366     {
0367       p->avl.cache = 1;
0368       q = p->avl.right;
0369       if (q == NULL)
0370       {
0371         q = node;
0372         p->avl.right = q = node;
0373         break;
0374       }
0375     }
0376     else if ((p->dd != dd) || (p->block != block))
0377     {
0378       p->avl.cache = -1;
0379       q = p->avl.left;
0380       if (q == NULL)
0381       {
0382         q = node;
0383         p->avl.left = q;
0384         break;
0385       }
0386     }
0387     else
0388     {
0389       return -1;
0390     }
0391 
0392     p = q;
0393   }
0394 
0395   q->avl.left = q->avl.right = NULL;
0396   q->avl.bal = 0;
0397   modified = true;
0398   buf_prev--;
0399 
0400   while (modified)
0401   {
0402     if (p->avl.cache == -1)
0403     {
0404       switch (p->avl.bal)
0405       {
0406         case 1:
0407           p->avl.bal = 0;
0408           modified = false;
0409           break;
0410 
0411         case 0:
0412           p->avl.bal = -1;
0413           break;
0414 
0415         case -1:
0416           p1 = p->avl.left;
0417           if (p1->avl.bal == -1) /* simple LL-turn */
0418           {
0419             p->avl.left = p1->avl.right;
0420             p1->avl.right = p;
0421             p->avl.bal = 0;
0422             p = p1;
0423           }
0424           else /* double LR-turn */
0425           {
0426             p2 = p1->avl.right;
0427             p1->avl.right = p2->avl.left;
0428             p2->avl.left = p1;
0429             p->avl.left = p2->avl.right;
0430             p2->avl.right = p;
0431             if (p2->avl.bal == -1) p->avl.bal = +1; else p->avl.bal = 0;
0432             if (p2->avl.bal == +1) p1->avl.bal = -1; else p1->avl.bal = 0;
0433             p = p2;
0434           }
0435           p->avl.bal = 0;
0436           modified = false;
0437           break;
0438 
0439         default:
0440           break;
0441       }
0442     }
0443     else
0444     {
0445       switch (p->avl.bal)
0446       {
0447         case -1:
0448           p->avl.bal = 0;
0449           modified = false;
0450           break;
0451 
0452         case 0:
0453           p->avl.bal = 1;
0454           break;
0455 
0456         case 1:
0457           p1 = p->avl.right;
0458           if (p1->avl.bal == 1) /* simple RR-turn */
0459           {
0460             p->avl.right = p1->avl.left;
0461             p1->avl.left = p;
0462             p->avl.bal = 0;
0463             p = p1;
0464           }
0465           else /* double RL-turn */
0466           {
0467             p2 = p1->avl.left;
0468             p1->avl.left = p2->avl.right;
0469             p2->avl.right = p1;
0470             p->avl.right = p2->avl.left;
0471             p2->avl.left = p;
0472             if (p2->avl.bal == +1) p->avl.bal = -1; else p->avl.bal = 0;
0473             if (p2->avl.bal == -1) p1->avl.bal = +1; else p1->avl.bal = 0;
0474             p = p2;
0475           }
0476           p->avl.bal = 0;
0477           modified = false;
0478           break;
0479 
0480         default:
0481           break;
0482       }
0483     }
0484     q = p;
0485     if (buf_prev > buf_stack)
0486     {
0487       p = *--buf_prev;
0488 
0489       if (p->avl.cache == -1)
0490       {
0491         p->avl.left = q;
0492       }
0493       else
0494       {
0495         p->avl.right = q;
0496       }
0497     }
0498     else
0499     {
0500       *root = p;
0501       break;
0502     }
0503   };
0504 
0505   return 0;
0506 }
0507 
0508 
0509 /**
0510  * Removes the node from the tree.
0511  *
0512  * @param root Pointer to pointer to the root node
0513  * @param node Pointer to the node to remove
0514  * @retval 0 Item removed
0515  * @retval -1 No such item found
0516  */
0517 static int
0518 rtems_bdbuf_avl_remove(rtems_bdbuf_buffer**      root,
0519                        const rtems_bdbuf_buffer* node)
0520 {
0521   const rtems_disk_device *dd = node->dd;
0522   rtems_blkdev_bnum block = node->block;
0523 
0524   rtems_bdbuf_buffer*  p = *root;
0525   rtems_bdbuf_buffer*  q;
0526   rtems_bdbuf_buffer*  r;
0527   rtems_bdbuf_buffer*  s;
0528   rtems_bdbuf_buffer*  p1;
0529   rtems_bdbuf_buffer*  p2;
0530   rtems_bdbuf_buffer*  buf_stack[RTEMS_BDBUF_AVL_MAX_HEIGHT];
0531   rtems_bdbuf_buffer** buf_prev = buf_stack;
0532 
0533   bool modified = false;
0534 
0535   memset (buf_stack, 0, sizeof(buf_stack));
0536 
0537   while (p != NULL)
0538   {
0539     *buf_prev++ = p;
0540 
0541     if (((uintptr_t) p->dd < (uintptr_t) dd)
0542         || ((p->dd == dd) && (p->block < block)))
0543     {
0544       p->avl.cache = 1;
0545       p = p->avl.right;
0546     }
0547     else if ((p->dd != dd) || (p->block != block))
0548     {
0549       p->avl.cache = -1;
0550       p = p->avl.left;
0551     }
0552     else
0553     {
0554       /* node found */
0555       break;
0556     }
0557   }
0558 
0559   if (p == NULL)
0560   {
0561     /* there is no such node */
0562     return -1;
0563   }
0564 
0565   q = p;
0566 
0567   buf_prev--;
0568   if (buf_prev > buf_stack)
0569   {
0570     p = *(buf_prev - 1);
0571   }
0572   else
0573   {
0574     p = NULL;
0575   }
0576 
0577   /* at this moment q - is a node to delete, p is q's parent */
0578   if (q->avl.right == NULL)
0579   {
0580     r = q->avl.left;
0581     if (r != NULL)
0582     {
0583       r->avl.bal = 0;
0584     }
0585     q = r;
0586   }
0587   else
0588   {
0589     rtems_bdbuf_buffer **t;
0590 
0591     r = q->avl.right;
0592 
0593     if (r->avl.left == NULL)
0594     {
0595       r->avl.left = q->avl.left;
0596       r->avl.bal = q->avl.bal;
0597       r->avl.cache = 1;
0598       *buf_prev++ = q = r;
0599     }
0600     else
0601     {
0602       t = buf_prev++;
0603       s = r;
0604 
0605       while (s->avl.left != NULL)
0606       {
0607         *buf_prev++ = r = s;
0608         s = r->avl.left;
0609         r->avl.cache = -1;
0610       }
0611 
0612       s->avl.left = q->avl.left;
0613       r->avl.left = s->avl.right;
0614       s->avl.right = q->avl.right;
0615       s->avl.bal = q->avl.bal;
0616       s->avl.cache = 1;
0617 
0618       *t = q = s;
0619     }
0620   }
0621 
0622   if (p != NULL)
0623   {
0624     if (p->avl.cache == -1)
0625     {
0626       p->avl.left = q;
0627     }
0628     else
0629     {
0630       p->avl.right = q;
0631     }
0632   }
0633   else
0634   {
0635     *root = q;
0636   }
0637 
0638   modified = true;
0639 
0640   while (modified)
0641   {
0642     if (buf_prev > buf_stack)
0643     {
0644       p = *--buf_prev;
0645     }
0646     else
0647     {
0648       break;
0649     }
0650 
0651     if (p->avl.cache == -1)
0652     {
0653       /* rebalance left branch */
0654       switch (p->avl.bal)
0655       {
0656         case -1:
0657           p->avl.bal = 0;
0658           break;
0659         case  0:
0660           p->avl.bal = 1;
0661           modified = false;
0662           break;
0663 
0664         case +1:
0665           p1 = p->avl.right;
0666 
0667           if (p1->avl.bal >= 0) /* simple RR-turn */
0668           {
0669             p->avl.right = p1->avl.left;
0670             p1->avl.left = p;
0671 
0672             if (p1->avl.bal == 0)
0673             {
0674               p1->avl.bal = -1;
0675               modified = false;
0676             }
0677             else
0678             {
0679               p->avl.bal = 0;
0680               p1->avl.bal = 0;
0681             }
0682             p = p1;
0683           }
0684           else /* double RL-turn */
0685           {
0686             p2 = p1->avl.left;
0687 
0688             p1->avl.left = p2->avl.right;
0689             p2->avl.right = p1;
0690             p->avl.right = p2->avl.left;
0691             p2->avl.left = p;
0692 
0693             if (p2->avl.bal == +1) p->avl.bal = -1; else p->avl.bal = 0;
0694             if (p2->avl.bal == -1) p1->avl.bal = 1; else p1->avl.bal = 0;
0695 
0696             p = p2;
0697             p2->avl.bal = 0;
0698           }
0699           break;
0700 
0701         default:
0702           break;
0703       }
0704     }
0705     else
0706     {
0707       /* rebalance right branch */
0708       switch (p->avl.bal)
0709       {
0710         case +1:
0711           p->avl.bal = 0;
0712           break;
0713 
0714         case  0:
0715           p->avl.bal = -1;
0716           modified = false;
0717           break;
0718 
0719         case -1:
0720           p1 = p->avl.left;
0721 
0722           if (p1->avl.bal <= 0) /* simple LL-turn */
0723           {
0724             p->avl.left = p1->avl.right;
0725             p1->avl.right = p;
0726             if (p1->avl.bal == 0)
0727             {
0728               p1->avl.bal = 1;
0729               modified = false;
0730             }
0731             else
0732             {
0733               p->avl.bal = 0;
0734               p1->avl.bal = 0;
0735             }
0736             p = p1;
0737           }
0738           else /* double LR-turn */
0739           {
0740             p2 = p1->avl.right;
0741 
0742             p1->avl.right = p2->avl.left;
0743             p2->avl.left = p1;
0744             p->avl.left = p2->avl.right;
0745             p2->avl.right = p;
0746 
0747             if (p2->avl.bal == -1) p->avl.bal = 1; else p->avl.bal = 0;
0748             if (p2->avl.bal == +1) p1->avl.bal = -1; else p1->avl.bal = 0;
0749 
0750             p = p2;
0751             p2->avl.bal = 0;
0752           }
0753           break;
0754 
0755         default:
0756           break;
0757       }
0758     }
0759 
0760     if (buf_prev > buf_stack)
0761     {
0762       q = *(buf_prev - 1);
0763 
0764       if (q->avl.cache == -1)
0765       {
0766         q->avl.left = p;
0767       }
0768       else
0769       {
0770         q->avl.right = p;
0771       }
0772     }
0773     else
0774     {
0775       *root = p;
0776       break;
0777     }
0778 
0779   }
0780 
0781   return 0;
0782 }
0783 
0784 static void
0785 rtems_bdbuf_set_state (rtems_bdbuf_buffer *bd, rtems_bdbuf_buf_state state)
0786 {
0787   bd->state = state;
0788 }
0789 
0790 static rtems_blkdev_bnum
0791 rtems_bdbuf_media_block (const rtems_disk_device *dd, rtems_blkdev_bnum block)
0792 {
0793   if (dd->block_to_media_block_shift >= 0)
0794     return block << dd->block_to_media_block_shift;
0795   else
0796     /*
0797      * Change the block number for the block size to the block number for the media
0798      * block size. We have to use 64bit maths. There is no short cut here.
0799      */
0800     return (rtems_blkdev_bnum)
0801       ((((uint64_t) block) * dd->block_size) / dd->media_block_size);
0802 }
0803 
0804 /**
0805  * Lock the mutex. A single task can nest calls.
0806  *
0807  * @param lock The mutex to lock.
0808  */
0809 static void
0810 rtems_bdbuf_lock (rtems_mutex *lock)
0811 {
0812   rtems_mutex_lock (lock);
0813 }
0814 
0815 /**
0816  * Unlock the mutex.
0817  *
0818  * @param lock The mutex to unlock.
0819  */
0820 static void
0821 rtems_bdbuf_unlock (rtems_mutex *lock)
0822 {
0823   rtems_mutex_unlock (lock);
0824 }
0825 
0826 /**
0827  * Lock the cache. A single task can nest calls.
0828  */
0829 static void
0830 rtems_bdbuf_lock_cache (void)
0831 {
0832   rtems_bdbuf_lock (&bdbuf_cache.lock);
0833 }
0834 
0835 /**
0836  * Unlock the cache.
0837  */
0838 static void
0839 rtems_bdbuf_unlock_cache (void)
0840 {
0841   rtems_bdbuf_unlock (&bdbuf_cache.lock);
0842 }
0843 
0844 /**
0845  * Lock the cache's sync. A single task can nest calls.
0846  */
0847 static void
0848 rtems_bdbuf_lock_sync (void)
0849 {
0850   rtems_bdbuf_lock (&bdbuf_cache.sync_lock);
0851 }
0852 
0853 /**
0854  * Unlock the cache's sync lock. Any blocked writers are woken.
0855  */
0856 static void
0857 rtems_bdbuf_unlock_sync (void)
0858 {
0859   rtems_bdbuf_unlock (&bdbuf_cache.sync_lock);
0860 }
0861 
0862 static void
0863 rtems_bdbuf_group_obtain (rtems_bdbuf_buffer *bd)
0864 {
0865   ++bd->group->users;
0866 }
0867 
0868 static void
0869 rtems_bdbuf_group_release (rtems_bdbuf_buffer *bd)
0870 {
0871   --bd->group->users;
0872 }
0873 
0874 /**
0875  * Wait until woken. Semaphores are used so a number of tasks can wait and can
0876  * be woken at once. Task events would require we maintain a list of tasks to
0877  * be woken and this would require storage and we do not know the number of
0878  * tasks that could be waiting.
0879  *
0880  * While we have the cache locked we can try and claim the semaphore and
0881  * therefore know when we release the lock to the cache we will block until the
0882  * semaphore is released. This may even happen before we get to block.
0883  *
0884  * A counter is used to save the release call when no one is waiting.
0885  *
0886  * The function assumes the cache is locked on entry and it will be locked on
0887  * exit.
0888  */
0889 static void
0890 rtems_bdbuf_anonymous_wait (rtems_bdbuf_waiters *waiters)
0891 {
0892   /*
0893    * Indicate we are waiting.
0894    */
0895   ++waiters->count;
0896 
0897   rtems_condition_variable_wait (&waiters->cond_var, &bdbuf_cache.lock);
0898 
0899   --waiters->count;
0900 }
0901 
0902 static void
0903 rtems_bdbuf_wait (rtems_bdbuf_buffer *bd, rtems_bdbuf_waiters *waiters)
0904 {
0905   rtems_bdbuf_group_obtain (bd);
0906   ++bd->waiters;
0907   rtems_bdbuf_anonymous_wait (waiters);
0908   --bd->waiters;
0909   rtems_bdbuf_group_release (bd);
0910 }
0911 
0912 /**
0913  * Wake a blocked resource. The resource has a counter that lets us know if
0914  * there are any waiters.
0915  */
0916 static void
0917 rtems_bdbuf_wake (rtems_bdbuf_waiters *waiters)
0918 {
0919   if (waiters->count > 0)
0920   {
0921     rtems_condition_variable_broadcast (&waiters->cond_var);
0922   }
0923 }
0924 
0925 static void
0926 rtems_bdbuf_wake_swapper (void)
0927 {
0928   rtems_status_code sc = rtems_event_send (bdbuf_cache.swapout,
0929                                            RTEMS_BDBUF_SWAPOUT_SYNC);
0930   if (sc != RTEMS_SUCCESSFUL)
0931     rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_SO_WAKE_1);
0932 }
0933 
0934 static bool
0935 rtems_bdbuf_has_buffer_waiters (void)
0936 {
0937   return bdbuf_cache.buffer_waiters.count;
0938 }
0939 
0940 static void
0941 rtems_bdbuf_remove_from_tree (rtems_bdbuf_buffer *bd)
0942 {
0943   if (rtems_bdbuf_avl_remove (&bdbuf_cache.tree, bd) != 0)
0944     rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_TREE_RM);
0945 }
0946 
0947 static void
0948 rtems_bdbuf_remove_from_tree_and_lru_list (rtems_bdbuf_buffer *bd)
0949 {
0950   switch (bd->state)
0951   {
0952     case RTEMS_BDBUF_STATE_FREE:
0953       break;
0954     case RTEMS_BDBUF_STATE_CACHED:
0955       rtems_bdbuf_remove_from_tree (bd);
0956       break;
0957     default:
0958       rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_10);
0959   }
0960 
0961   rtems_chain_extract_unprotected (&bd->link);
0962 }
0963 
0964 static void
0965 rtems_bdbuf_make_free_and_add_to_lru_list (rtems_bdbuf_buffer *bd)
0966 {
0967   rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_FREE);
0968   rtems_chain_prepend_unprotected (&bdbuf_cache.lru, &bd->link);
0969 }
0970 
0971 static void
0972 rtems_bdbuf_make_empty (rtems_bdbuf_buffer *bd)
0973 {
0974   rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_EMPTY);
0975 }
0976 
0977 static void
0978 rtems_bdbuf_make_cached_and_add_to_lru_list (rtems_bdbuf_buffer *bd)
0979 {
0980   rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_CACHED);
0981   rtems_chain_append_unprotected (&bdbuf_cache.lru, &bd->link);
0982 }
0983 
0984 static void
0985 rtems_bdbuf_discard_buffer (rtems_bdbuf_buffer *bd)
0986 {
0987   rtems_bdbuf_make_empty (bd);
0988 
0989   if (bd->waiters == 0)
0990   {
0991     rtems_bdbuf_remove_from_tree (bd);
0992     rtems_bdbuf_make_free_and_add_to_lru_list (bd);
0993   }
0994 }
0995 
0996 static void
0997 rtems_bdbuf_add_to_modified_list_after_access (rtems_bdbuf_buffer *bd)
0998 {
0999   if (bdbuf_cache.sync_active && bdbuf_cache.sync_device == bd->dd)
1000   {
1001     rtems_bdbuf_unlock_cache ();
1002 
1003     /*
1004      * Wait for the sync lock.
1005      */
1006     rtems_bdbuf_lock_sync ();
1007 
1008     rtems_bdbuf_unlock_sync ();
1009     rtems_bdbuf_lock_cache ();
1010   }
1011 
1012   /*
1013    * Only the first modified release sets the timer and any further user
1014    * accesses do not change the timer value which should move down. This
1015    * assumes the user's hold of the buffer is much less than the time on the
1016    * modified list. Resetting the timer on each access which could result in a
1017    * buffer never getting to 0 and never being forced onto disk. This raises a
1018    * difficult question. Is a snapshot of a block that is changing better than
1019    * nothing being written? We have tended to think we should hold changes for
1020    * only a specific period of time even if still changing and get onto disk
1021    * and letting the file system try and recover this position if it can.
1022    */
1023   if (bd->state == RTEMS_BDBUF_STATE_ACCESS_CACHED
1024         || bd->state == RTEMS_BDBUF_STATE_ACCESS_EMPTY)
1025     bd->hold_timer = bdbuf_config.swap_block_hold;
1026 
1027   rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_MODIFIED);
1028   rtems_chain_append_unprotected (&bdbuf_cache.modified, &bd->link);
1029 
1030   if (bd->waiters)
1031     rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1032   else if (rtems_bdbuf_has_buffer_waiters ())
1033     rtems_bdbuf_wake_swapper ();
1034 }
1035 
1036 static void
1037 rtems_bdbuf_add_to_lru_list_after_access (rtems_bdbuf_buffer *bd)
1038 {
1039   rtems_bdbuf_group_release (bd);
1040   rtems_bdbuf_make_cached_and_add_to_lru_list (bd);
1041 
1042   if (bd->waiters)
1043     rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1044   else
1045     rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1046 }
1047 
1048 /**
1049  * Compute the number of BDs per group for a given buffer size.
1050  *
1051  * @param size The buffer size. It can be any size and we scale up.
1052  */
1053 static size_t
1054 rtems_bdbuf_bds_per_group (size_t size)
1055 {
1056   size_t bufs_per_size;
1057   size_t bds_per_size;
1058 
1059   if (size > bdbuf_config.buffer_max)
1060     return 0;
1061 
1062   bufs_per_size = ((size - 1) / bdbuf_config.buffer_min) + 1;
1063 
1064   for (bds_per_size = 1;
1065        bds_per_size < bufs_per_size;
1066        bds_per_size <<= 1)
1067     ;
1068 
1069   return bdbuf_cache.max_bds_per_group / bds_per_size;
1070 }
1071 
1072 static void
1073 rtems_bdbuf_discard_buffer_after_access (rtems_bdbuf_buffer *bd)
1074 {
1075   rtems_bdbuf_group_release (bd);
1076   rtems_bdbuf_discard_buffer (bd);
1077 
1078   if (bd->waiters)
1079     rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1080   else
1081     rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1082 }
1083 
1084 /**
1085  * Reallocate a group. The BDs currently allocated in the group are removed
1086  * from the ALV tree and any lists then the new BD's are prepended to the ready
1087  * list of the cache.
1088  *
1089  * @param group The group to reallocate.
1090  * @param new_bds_per_group The new count of BDs per group.
1091  * @return A buffer of this group.
1092  */
1093 static rtems_bdbuf_buffer *
1094 rtems_bdbuf_group_realloc (rtems_bdbuf_group* group, size_t new_bds_per_group)
1095 {
1096   rtems_bdbuf_buffer* bd;
1097   size_t              b;
1098   size_t              bufs_per_bd;
1099 
1100   if (rtems_bdbuf_tracer)
1101     printf ("bdbuf:realloc: %tu: %zd -> %zd\n",
1102             group - bdbuf_cache.groups, group->bds_per_group,
1103             new_bds_per_group);
1104 
1105   bufs_per_bd = bdbuf_cache.max_bds_per_group / group->bds_per_group;
1106 
1107   for (b = 0, bd = group->bdbuf;
1108        b < group->bds_per_group;
1109        b++, bd += bufs_per_bd)
1110     rtems_bdbuf_remove_from_tree_and_lru_list (bd);
1111 
1112   group->bds_per_group = new_bds_per_group;
1113   bufs_per_bd = bdbuf_cache.max_bds_per_group / new_bds_per_group;
1114 
1115   for (b = 1, bd = group->bdbuf + bufs_per_bd;
1116        b < group->bds_per_group;
1117        b++, bd += bufs_per_bd)
1118     rtems_bdbuf_make_free_and_add_to_lru_list (bd);
1119 
1120   if (b > 1)
1121     rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1122 
1123   return group->bdbuf;
1124 }
1125 
1126 static void
1127 rtems_bdbuf_setup_empty_buffer (rtems_bdbuf_buffer *bd,
1128                                 rtems_disk_device  *dd,
1129                                 rtems_blkdev_bnum   block)
1130 {
1131   bd->dd        = dd ;
1132   bd->block     = block;
1133   bd->avl.left  = NULL;
1134   bd->avl.right = NULL;
1135   bd->waiters   = 0;
1136 
1137   if (rtems_bdbuf_avl_insert (&bdbuf_cache.tree, bd) != 0)
1138     rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_RECYCLE);
1139 
1140   rtems_bdbuf_make_empty (bd);
1141 }
1142 
1143 static rtems_bdbuf_buffer *
1144 rtems_bdbuf_get_buffer_from_lru_list (rtems_disk_device *dd,
1145                                       rtems_blkdev_bnum  block)
1146 {
1147   rtems_chain_node *node = rtems_chain_first (&bdbuf_cache.lru);
1148 
1149   while (!rtems_chain_is_tail (&bdbuf_cache.lru, node))
1150   {
1151     rtems_bdbuf_buffer *bd = (rtems_bdbuf_buffer *) node;
1152     rtems_bdbuf_buffer *empty_bd = NULL;
1153 
1154     if (rtems_bdbuf_tracer)
1155       printf ("bdbuf:next-bd: %tu (%td:%" PRId32 ") %zd -> %zd\n",
1156               bd - bdbuf_cache.bds,
1157               bd->group - bdbuf_cache.groups, bd->group->users,
1158               bd->group->bds_per_group, dd->bds_per_group);
1159 
1160     /*
1161      * If nobody waits for this BD, we may recycle it.
1162      */
1163     if (bd->waiters == 0)
1164     {
1165       if (bd->group->bds_per_group == dd->bds_per_group)
1166       {
1167         rtems_bdbuf_remove_from_tree_and_lru_list (bd);
1168 
1169         empty_bd = bd;
1170       }
1171       else if (bd->group->users == 0)
1172         empty_bd = rtems_bdbuf_group_realloc (bd->group, dd->bds_per_group);
1173     }
1174 
1175     if (empty_bd != NULL)
1176     {
1177       rtems_bdbuf_setup_empty_buffer (empty_bd, dd, block);
1178 
1179       return empty_bd;
1180     }
1181 
1182     node = rtems_chain_next (node);
1183   }
1184 
1185   return NULL;
1186 }
1187 
1188 static rtems_status_code
1189 rtems_bdbuf_create_task(
1190   rtems_name name,
1191   rtems_task_priority priority,
1192   rtems_task_priority default_priority,
1193   rtems_id *id
1194 )
1195 {
1196   rtems_status_code sc;
1197   size_t stack_size = bdbuf_config.task_stack_size ?
1198     bdbuf_config.task_stack_size : RTEMS_BDBUF_TASK_STACK_SIZE_DEFAULT;
1199 
1200   priority = priority != 0 ? priority : default_priority;
1201 
1202   sc = rtems_task_create (name,
1203                           priority,
1204                           stack_size,
1205                           RTEMS_PREEMPT | RTEMS_NO_TIMESLICE | RTEMS_NO_ASR,
1206                           RTEMS_LOCAL | RTEMS_NO_FLOATING_POINT,
1207                           id);
1208 
1209   return sc;
1210 }
1211 
1212 static rtems_bdbuf_swapout_transfer*
1213 rtems_bdbuf_swapout_transfer_alloc (void)
1214 {
1215   /*
1216    * @note chrisj The rtems_blkdev_request and the array at the end is a hack.
1217    * I am disappointment at finding code like this in RTEMS. The request should
1218    * have been a rtems_chain_control. Simple, fast and less storage as the node
1219    * is already part of the buffer structure.
1220    */
1221   size_t transfer_size = sizeof (rtems_bdbuf_swapout_transfer)
1222     + (bdbuf_config.max_write_blocks * sizeof (rtems_blkdev_sg_buffer));
1223   return calloc (1, transfer_size);
1224 }
1225 
1226 static void
1227 rtems_bdbuf_transfer_done (rtems_blkdev_request* req, rtems_status_code status);
1228 
1229 static void
1230 rtems_bdbuf_swapout_transfer_init (rtems_bdbuf_swapout_transfer* transfer,
1231                                    rtems_id id)
1232 {
1233   rtems_chain_initialize_empty (&transfer->bds);
1234   transfer->dd = BDBUF_INVALID_DEV;
1235   transfer->syncing = false;
1236   transfer->write_req.req = RTEMS_BLKDEV_REQ_WRITE;
1237   transfer->write_req.done = rtems_bdbuf_transfer_done;
1238   transfer->write_req.io_task = id;
1239 }
1240 
1241 static size_t
1242 rtems_bdbuf_swapout_worker_size (void)
1243 {
1244   return sizeof (rtems_bdbuf_swapout_worker)
1245     + (bdbuf_config.max_write_blocks * sizeof (rtems_blkdev_sg_buffer));
1246 }
1247 
1248 static rtems_task
1249 rtems_bdbuf_swapout_worker_task (rtems_task_argument arg);
1250 
1251 static rtems_status_code
1252 rtems_bdbuf_swapout_workers_create (void)
1253 {
1254   rtems_status_code  sc;
1255   size_t             w;
1256   size_t             worker_size;
1257   char              *worker_current;
1258 
1259   worker_size = rtems_bdbuf_swapout_worker_size ();
1260   worker_current = calloc (1, bdbuf_config.swapout_workers * worker_size);
1261   sc = worker_current != NULL ? RTEMS_SUCCESSFUL : RTEMS_NO_MEMORY;
1262 
1263   bdbuf_cache.swapout_workers = (rtems_bdbuf_swapout_worker *) worker_current;
1264 
1265   for (w = 0;
1266        sc == RTEMS_SUCCESSFUL && w < bdbuf_config.swapout_workers;
1267        w++, worker_current += worker_size)
1268   {
1269     rtems_bdbuf_swapout_worker *worker = (rtems_bdbuf_swapout_worker *) worker_current;
1270 
1271     sc = rtems_bdbuf_create_task (rtems_build_name('B', 'D', 'o', 'a' + w),
1272                                   bdbuf_config.swapout_worker_priority,
1273                                   RTEMS_BDBUF_SWAPOUT_WORKER_TASK_PRIORITY_DEFAULT,
1274                                   &worker->id);
1275     if (sc == RTEMS_SUCCESSFUL)
1276     {
1277       rtems_bdbuf_swapout_transfer_init (&worker->transfer, worker->id);
1278 
1279       rtems_chain_append_unprotected (&bdbuf_cache.swapout_free_workers, &worker->link);
1280       worker->enabled = true;
1281 
1282       sc = rtems_task_start (worker->id,
1283                              rtems_bdbuf_swapout_worker_task,
1284                              (rtems_task_argument) worker);
1285     }
1286   }
1287 
1288   return sc;
1289 }
1290 
1291 static size_t
1292 rtems_bdbuf_read_request_size (uint32_t transfer_count)
1293 {
1294   return sizeof (rtems_blkdev_request)
1295     + sizeof (rtems_blkdev_sg_buffer) * transfer_count;
1296 }
1297 
1298 static rtems_status_code
1299 rtems_bdbuf_do_init (void)
1300 {
1301   rtems_bdbuf_group*  group;
1302   rtems_bdbuf_buffer* bd;
1303   uint8_t*            buffer;
1304   size_t              b;
1305   rtems_status_code   sc;
1306 
1307   if (rtems_bdbuf_tracer)
1308     printf ("bdbuf:init\n");
1309 
1310   if (rtems_interrupt_is_in_progress())
1311     return RTEMS_CALLED_FROM_ISR;
1312 
1313   /*
1314    * Check the configuration table values.
1315    */
1316 
1317   if ((bdbuf_config.buffer_max % bdbuf_config.buffer_min) != 0)
1318     return RTEMS_INVALID_NUMBER;
1319 
1320   if (rtems_bdbuf_read_request_size (bdbuf_config.max_read_ahead_blocks)
1321       > RTEMS_MINIMUM_STACK_SIZE / 8U)
1322     return RTEMS_INVALID_NUMBER;
1323 
1324   bdbuf_cache.sync_device = BDBUF_INVALID_DEV;
1325 
1326   rtems_chain_initialize_empty (&bdbuf_cache.swapout_free_workers);
1327   rtems_chain_initialize_empty (&bdbuf_cache.lru);
1328   rtems_chain_initialize_empty (&bdbuf_cache.modified);
1329   rtems_chain_initialize_empty (&bdbuf_cache.sync);
1330   rtems_chain_initialize_empty (&bdbuf_cache.read_ahead_chain);
1331 
1332   rtems_mutex_set_name (&bdbuf_cache.lock, "bdbuf lock");
1333   rtems_mutex_set_name (&bdbuf_cache.sync_lock, "bdbuf sync lock");
1334   rtems_condition_variable_set_name (&bdbuf_cache.access_waiters.cond_var,
1335                                      "bdbuf access");
1336   rtems_condition_variable_set_name (&bdbuf_cache.transfer_waiters.cond_var,
1337                                      "bdbuf transfer");
1338   rtems_condition_variable_set_name (&bdbuf_cache.buffer_waiters.cond_var,
1339                                      "bdbuf buffer");
1340 
1341   rtems_bdbuf_lock_cache ();
1342 
1343   /*
1344    * Compute the various number of elements in the cache.
1345    */
1346   bdbuf_cache.buffer_min_count =
1347     bdbuf_config.size / bdbuf_config.buffer_min;
1348   bdbuf_cache.max_bds_per_group =
1349     bdbuf_config.buffer_max / bdbuf_config.buffer_min;
1350   bdbuf_cache.group_count =
1351     bdbuf_cache.buffer_min_count / bdbuf_cache.max_bds_per_group;
1352 
1353   /*
1354    * Allocate the memory for the buffer descriptors.
1355    */
1356   bdbuf_cache.bds = calloc (sizeof (rtems_bdbuf_buffer),
1357                             bdbuf_cache.buffer_min_count);
1358   if (!bdbuf_cache.bds)
1359     goto error;
1360 
1361   /*
1362    * Allocate the memory for the buffer descriptors.
1363    */
1364   bdbuf_cache.groups = calloc (sizeof (rtems_bdbuf_group),
1365                                bdbuf_cache.group_count);
1366   if (!bdbuf_cache.groups)
1367     goto error;
1368 
1369   /*
1370    * Allocate memory for buffer memory. The buffer memory will be cache
1371    * aligned. It is possible to free the memory allocated by
1372    * rtems_cache_aligned_malloc() with free().
1373    */
1374   bdbuf_cache.buffers = rtems_cache_aligned_malloc(bdbuf_cache.buffer_min_count
1375                                                    * bdbuf_config.buffer_min);
1376   if (bdbuf_cache.buffers == NULL)
1377     goto error;
1378 
1379   /*
1380    * The cache is empty after opening so we need to add all the buffers to it
1381    * and initialise the groups.
1382    */
1383   for (b = 0, group = bdbuf_cache.groups,
1384          bd = bdbuf_cache.bds, buffer = bdbuf_cache.buffers;
1385        b < bdbuf_cache.buffer_min_count;
1386        b++, bd++, buffer += bdbuf_config.buffer_min)
1387   {
1388     bd->dd    = BDBUF_INVALID_DEV;
1389     bd->group  = group;
1390     bd->buffer = buffer;
1391 
1392     rtems_chain_append_unprotected (&bdbuf_cache.lru, &bd->link);
1393 
1394     if ((b % bdbuf_cache.max_bds_per_group) ==
1395         (bdbuf_cache.max_bds_per_group - 1))
1396       group++;
1397   }
1398 
1399   for (b = 0,
1400          group = bdbuf_cache.groups,
1401          bd = bdbuf_cache.bds;
1402        b < bdbuf_cache.group_count;
1403        b++,
1404          group++,
1405          bd += bdbuf_cache.max_bds_per_group)
1406   {
1407     group->bds_per_group = bdbuf_cache.max_bds_per_group;
1408     group->bdbuf = bd;
1409   }
1410 
1411   /*
1412    * Create and start swapout task.
1413    */
1414 
1415   bdbuf_cache.swapout_transfer = rtems_bdbuf_swapout_transfer_alloc ();
1416   if (!bdbuf_cache.swapout_transfer)
1417     goto error;
1418 
1419   bdbuf_cache.swapout_enabled = true;
1420 
1421   sc = rtems_bdbuf_create_task (rtems_build_name('B', 'S', 'W', 'P'),
1422                                 bdbuf_config.swapout_priority,
1423                                 RTEMS_BDBUF_SWAPOUT_TASK_PRIORITY_DEFAULT,
1424                                 &bdbuf_cache.swapout);
1425   if (sc != RTEMS_SUCCESSFUL)
1426     goto error;
1427 
1428   rtems_bdbuf_swapout_transfer_init (bdbuf_cache.swapout_transfer, bdbuf_cache.swapout);
1429 
1430   sc = rtems_task_start (bdbuf_cache.swapout,
1431                          rtems_bdbuf_swapout_task,
1432                          (rtems_task_argument) bdbuf_cache.swapout_transfer);
1433   if (sc != RTEMS_SUCCESSFUL)
1434     goto error;
1435 
1436   if (bdbuf_config.swapout_workers > 0)
1437   {
1438     sc = rtems_bdbuf_swapout_workers_create ();
1439     if (sc != RTEMS_SUCCESSFUL)
1440       goto error;
1441   }
1442 
1443   if (bdbuf_config.max_read_ahead_blocks > 0)
1444   {
1445     bdbuf_cache.read_ahead_enabled = true;
1446     sc = rtems_bdbuf_create_task (rtems_build_name('B', 'R', 'D', 'A'),
1447                                   bdbuf_config.read_ahead_priority,
1448                                   RTEMS_BDBUF_READ_AHEAD_TASK_PRIORITY_DEFAULT,
1449                                   &bdbuf_cache.read_ahead_task);
1450     if (sc != RTEMS_SUCCESSFUL)
1451       goto error;
1452 
1453     sc = rtems_task_start (bdbuf_cache.read_ahead_task,
1454                            rtems_bdbuf_read_ahead_task,
1455                            0);
1456     if (sc != RTEMS_SUCCESSFUL)
1457       goto error;
1458   }
1459 
1460   rtems_bdbuf_unlock_cache ();
1461 
1462   return RTEMS_SUCCESSFUL;
1463 
1464 error:
1465 
1466   if (bdbuf_cache.read_ahead_task != 0)
1467     rtems_task_delete (bdbuf_cache.read_ahead_task);
1468 
1469   if (bdbuf_cache.swapout != 0)
1470     rtems_task_delete (bdbuf_cache.swapout);
1471 
1472   if (bdbuf_cache.swapout_workers)
1473   {
1474     char   *worker_current = (char *) bdbuf_cache.swapout_workers;
1475     size_t  worker_size = rtems_bdbuf_swapout_worker_size ();
1476     size_t  w;
1477 
1478     for (w = 0;
1479          w < bdbuf_config.swapout_workers;
1480          w++, worker_current += worker_size)
1481     {
1482       rtems_bdbuf_swapout_worker *worker = (rtems_bdbuf_swapout_worker *) worker_current;
1483 
1484       if (worker->id != 0) {
1485         rtems_task_delete (worker->id);
1486       }
1487     }
1488   }
1489 
1490   free (bdbuf_cache.buffers);
1491   free (bdbuf_cache.groups);
1492   free (bdbuf_cache.bds);
1493   free (bdbuf_cache.swapout_transfer);
1494   free (bdbuf_cache.swapout_workers);
1495 
1496   rtems_bdbuf_unlock_cache ();
1497 
1498   return RTEMS_UNSATISFIED;
1499 }
1500 
1501 static void
1502 rtems_bdbuf_init_once (void)
1503 {
1504   bdbuf_cache.init_status = rtems_bdbuf_do_init();
1505 }
1506 
1507 rtems_status_code
1508 rtems_bdbuf_init (void)
1509 {
1510   int eno;
1511 
1512   eno = pthread_once (&bdbuf_cache.once, rtems_bdbuf_init_once);
1513   _Assert (eno == 0);
1514   (void) eno;
1515 
1516   return bdbuf_cache.init_status;
1517 }
1518 
1519 static void
1520 rtems_bdbuf_wait_for_event (rtems_event_set event)
1521 {
1522   rtems_status_code sc = RTEMS_SUCCESSFUL;
1523   rtems_event_set   out = 0;
1524 
1525   sc = rtems_event_receive (event,
1526                             RTEMS_EVENT_ALL | RTEMS_WAIT,
1527                             RTEMS_NO_TIMEOUT,
1528                             &out);
1529 
1530   if (sc != RTEMS_SUCCESSFUL || out != event)
1531     rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_WAIT_EVNT);
1532 }
1533 
1534 static void
1535 rtems_bdbuf_wait_for_transient_event (void)
1536 {
1537   rtems_status_code sc = RTEMS_SUCCESSFUL;
1538 
1539   sc = rtems_event_transient_receive (RTEMS_WAIT, RTEMS_NO_TIMEOUT);
1540   if (sc != RTEMS_SUCCESSFUL)
1541     rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_WAIT_TRANS_EVNT);
1542 }
1543 
1544 static void
1545 rtems_bdbuf_wait_for_access (rtems_bdbuf_buffer *bd)
1546 {
1547   while (true)
1548   {
1549     switch (bd->state)
1550     {
1551       case RTEMS_BDBUF_STATE_MODIFIED:
1552         rtems_bdbuf_group_release (bd);
1553         /* Fall through */
1554       case RTEMS_BDBUF_STATE_CACHED:
1555         rtems_chain_extract_unprotected (&bd->link);
1556         /* Fall through */
1557       case RTEMS_BDBUF_STATE_EMPTY:
1558         return;
1559       case RTEMS_BDBUF_STATE_ACCESS_CACHED:
1560       case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
1561       case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
1562       case RTEMS_BDBUF_STATE_ACCESS_PURGED:
1563         rtems_bdbuf_wait (bd, &bdbuf_cache.access_waiters);
1564         break;
1565       case RTEMS_BDBUF_STATE_SYNC:
1566       case RTEMS_BDBUF_STATE_TRANSFER:
1567       case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
1568         rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters);
1569         break;
1570       default:
1571         rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_7);
1572     }
1573   }
1574 }
1575 
1576 static void
1577 rtems_bdbuf_request_sync_for_modified_buffer (rtems_bdbuf_buffer *bd)
1578 {
1579   rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_SYNC);
1580   rtems_chain_extract_unprotected (&bd->link);
1581   rtems_chain_append_unprotected (&bdbuf_cache.sync, &bd->link);
1582   rtems_bdbuf_wake_swapper ();
1583 }
1584 
1585 /**
1586  * @brief Waits until the buffer is ready for recycling.
1587  *
1588  * @retval @c true Buffer is valid and may be recycled.
1589  * @retval @c false Buffer is invalid and has to searched again.
1590  */
1591 static bool
1592 rtems_bdbuf_wait_for_recycle (rtems_bdbuf_buffer *bd)
1593 {
1594   while (true)
1595   {
1596     switch (bd->state)
1597     {
1598       case RTEMS_BDBUF_STATE_FREE:
1599         return true;
1600       case RTEMS_BDBUF_STATE_MODIFIED:
1601         rtems_bdbuf_request_sync_for_modified_buffer (bd);
1602         break;
1603       case RTEMS_BDBUF_STATE_CACHED:
1604       case RTEMS_BDBUF_STATE_EMPTY:
1605         if (bd->waiters == 0)
1606           return true;
1607         else
1608         {
1609           /*
1610            * It is essential that we wait here without a special wait count and
1611            * without the group in use.  Otherwise we could trigger a wait ping
1612            * pong with another recycle waiter.  The state of the buffer is
1613            * arbitrary afterwards.
1614            */
1615           rtems_bdbuf_anonymous_wait (&bdbuf_cache.buffer_waiters);
1616           return false;
1617         }
1618       case RTEMS_BDBUF_STATE_ACCESS_CACHED:
1619       case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
1620       case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
1621       case RTEMS_BDBUF_STATE_ACCESS_PURGED:
1622         rtems_bdbuf_wait (bd, &bdbuf_cache.access_waiters);
1623         break;
1624       case RTEMS_BDBUF_STATE_SYNC:
1625       case RTEMS_BDBUF_STATE_TRANSFER:
1626       case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
1627         rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters);
1628         break;
1629       default:
1630         rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_8);
1631     }
1632   }
1633 }
1634 
1635 static void
1636 rtems_bdbuf_wait_for_sync_done (rtems_bdbuf_buffer *bd)
1637 {
1638   while (true)
1639   {
1640     switch (bd->state)
1641     {
1642       case RTEMS_BDBUF_STATE_CACHED:
1643       case RTEMS_BDBUF_STATE_EMPTY:
1644       case RTEMS_BDBUF_STATE_MODIFIED:
1645       case RTEMS_BDBUF_STATE_ACCESS_CACHED:
1646       case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
1647       case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
1648       case RTEMS_BDBUF_STATE_ACCESS_PURGED:
1649         return;
1650       case RTEMS_BDBUF_STATE_SYNC:
1651       case RTEMS_BDBUF_STATE_TRANSFER:
1652       case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
1653         rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters);
1654         break;
1655       default:
1656         rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_9);
1657     }
1658   }
1659 }
1660 
1661 static void
1662 rtems_bdbuf_wait_for_buffer (void)
1663 {
1664   if (!rtems_chain_is_empty (&bdbuf_cache.modified))
1665     rtems_bdbuf_wake_swapper ();
1666 
1667   rtems_bdbuf_anonymous_wait (&bdbuf_cache.buffer_waiters);
1668 }
1669 
1670 static void
1671 rtems_bdbuf_sync_after_access (rtems_bdbuf_buffer *bd)
1672 {
1673   rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_SYNC);
1674 
1675   rtems_chain_append_unprotected (&bdbuf_cache.sync, &bd->link);
1676 
1677   if (bd->waiters)
1678     rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1679 
1680   rtems_bdbuf_wake_swapper ();
1681   rtems_bdbuf_wait_for_sync_done (bd);
1682 
1683   /*
1684    * We may have created a cached or empty buffer which may be recycled.
1685    */
1686   if (bd->waiters == 0
1687         && (bd->state == RTEMS_BDBUF_STATE_CACHED
1688           || bd->state == RTEMS_BDBUF_STATE_EMPTY))
1689   {
1690     if (bd->state == RTEMS_BDBUF_STATE_EMPTY)
1691     {
1692       rtems_bdbuf_remove_from_tree (bd);
1693       rtems_bdbuf_make_free_and_add_to_lru_list (bd);
1694     }
1695     rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1696   }
1697 }
1698 
1699 static rtems_bdbuf_buffer *
1700 rtems_bdbuf_get_buffer_for_read_ahead (rtems_disk_device *dd,
1701                                        rtems_blkdev_bnum  block)
1702 {
1703   rtems_bdbuf_buffer *bd = NULL;
1704 
1705   bd = rtems_bdbuf_avl_search (&bdbuf_cache.tree, dd, block);
1706 
1707   if (bd == NULL)
1708   {
1709     bd = rtems_bdbuf_get_buffer_from_lru_list (dd, block);
1710 
1711     if (bd != NULL)
1712       rtems_bdbuf_group_obtain (bd);
1713   }
1714   else
1715     /*
1716      * The buffer is in the cache.  So it is already available or in use, and
1717      * thus no need for a read ahead.
1718      */
1719     bd = NULL;
1720 
1721   return bd;
1722 }
1723 
1724 static rtems_bdbuf_buffer *
1725 rtems_bdbuf_get_buffer_for_access (rtems_disk_device *dd,
1726                                    rtems_blkdev_bnum  block)
1727 {
1728   rtems_bdbuf_buffer *bd = NULL;
1729 
1730   do
1731   {
1732     bd = rtems_bdbuf_avl_search (&bdbuf_cache.tree, dd, block);
1733 
1734     if (bd != NULL)
1735     {
1736       if (bd->group->bds_per_group != dd->bds_per_group)
1737       {
1738         if (rtems_bdbuf_wait_for_recycle (bd))
1739         {
1740           rtems_bdbuf_remove_from_tree_and_lru_list (bd);
1741           rtems_bdbuf_make_free_and_add_to_lru_list (bd);
1742           rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1743         }
1744         bd = NULL;
1745       }
1746     }
1747     else
1748     {
1749       bd = rtems_bdbuf_get_buffer_from_lru_list (dd, block);
1750 
1751       if (bd == NULL)
1752         rtems_bdbuf_wait_for_buffer ();
1753     }
1754   }
1755   while (bd == NULL);
1756 
1757   rtems_bdbuf_wait_for_access (bd);
1758   rtems_bdbuf_group_obtain (bd);
1759 
1760   return bd;
1761 }
1762 
1763 static rtems_status_code
1764 rtems_bdbuf_get_media_block (const rtems_disk_device *dd,
1765                              rtems_blkdev_bnum        block,
1766                              rtems_blkdev_bnum       *media_block_ptr)
1767 {
1768   rtems_status_code sc = RTEMS_SUCCESSFUL;
1769 
1770   if (block < dd->block_count)
1771   {
1772     /*
1773      * Compute the media block number. Drivers work with media block number not
1774      * the block number a BD may have as this depends on the block size set by
1775      * the user.
1776      */
1777     *media_block_ptr = rtems_bdbuf_media_block (dd, block) + dd->start;
1778   }
1779   else
1780   {
1781     sc = RTEMS_INVALID_ID;
1782   }
1783 
1784   return sc;
1785 }
1786 
1787 rtems_status_code
1788 rtems_bdbuf_get (rtems_disk_device   *dd,
1789                  rtems_blkdev_bnum    block,
1790                  rtems_bdbuf_buffer **bd_ptr)
1791 {
1792   rtems_status_code   sc = RTEMS_SUCCESSFUL;
1793   rtems_bdbuf_buffer *bd = NULL;
1794   rtems_blkdev_bnum   media_block;
1795 
1796   rtems_bdbuf_lock_cache ();
1797 
1798   sc = rtems_bdbuf_get_media_block (dd, block, &media_block);
1799   if (sc == RTEMS_SUCCESSFUL)
1800   {
1801     /*
1802      * Print the block index relative to the physical disk.
1803      */
1804     if (rtems_bdbuf_tracer)
1805       printf ("bdbuf:get: %" PRIu32 " (%" PRIu32 ") (dev = %08x)\n",
1806               media_block, block, (unsigned) dd->dev);
1807 
1808     bd = rtems_bdbuf_get_buffer_for_access (dd, media_block);
1809 
1810     switch (bd->state)
1811     {
1812       case RTEMS_BDBUF_STATE_CACHED:
1813         rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_CACHED);
1814         break;
1815       case RTEMS_BDBUF_STATE_EMPTY:
1816         rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_EMPTY);
1817         break;
1818       case RTEMS_BDBUF_STATE_MODIFIED:
1819         /*
1820          * To get a modified buffer could be considered a bug in the caller
1821          * because you should not be getting an already modified buffer but
1822          * user may have modified a byte in a block then decided to seek the
1823          * start and write the whole block and the file system will have no
1824          * record of this so just gets the block to fill.
1825          */
1826         rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_MODIFIED);
1827         break;
1828       default:
1829         rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_2);
1830         break;
1831     }
1832 
1833     if (rtems_bdbuf_tracer)
1834     {
1835       rtems_bdbuf_show_users ("get", bd);
1836       rtems_bdbuf_show_usage ();
1837     }
1838   }
1839 
1840   rtems_bdbuf_unlock_cache ();
1841 
1842   *bd_ptr = bd;
1843 
1844   return sc;
1845 }
1846 
1847 /**
1848  * Call back handler called by the low level driver when the transfer has
1849  * completed. This function may be invoked from interrupt handler.
1850  *
1851  * @param arg Arbitrary argument specified in block device request
1852  *            structure (in this case - pointer to the appropriate
1853  *            block device request structure).
1854  * @param status I/O completion status
1855  */
1856 static void
1857 rtems_bdbuf_transfer_done (rtems_blkdev_request* req, rtems_status_code status)
1858 {
1859   req->status = status;
1860 
1861   rtems_event_transient_send (req->io_task);
1862 }
1863 
1864 static rtems_status_code
1865 rtems_bdbuf_execute_transfer_request (rtems_disk_device    *dd,
1866                                       rtems_blkdev_request *req,
1867                                       bool                  cache_locked)
1868 {
1869   rtems_status_code sc = RTEMS_SUCCESSFUL;
1870   uint32_t transfer_index = 0;
1871   bool wake_transfer_waiters = false;
1872   bool wake_buffer_waiters = false;
1873 
1874   if (cache_locked)
1875     rtems_bdbuf_unlock_cache ();
1876 
1877   /* The return value will be ignored for transfer requests */
1878   dd->ioctl (dd->phys_dev, RTEMS_BLKIO_REQUEST, req);
1879 
1880   /* Wait for transfer request completion */
1881   rtems_bdbuf_wait_for_transient_event ();
1882   sc = req->status;
1883 
1884   rtems_bdbuf_lock_cache ();
1885 
1886   /* Statistics */
1887   if (req->req == RTEMS_BLKDEV_REQ_READ)
1888   {
1889     dd->stats.read_blocks += req->bufnum;
1890     if (sc != RTEMS_SUCCESSFUL)
1891       ++dd->stats.read_errors;
1892   }
1893   else
1894   {
1895     dd->stats.write_blocks += req->bufnum;
1896     ++dd->stats.write_transfers;
1897     if (sc != RTEMS_SUCCESSFUL)
1898       ++dd->stats.write_errors;
1899   }
1900 
1901   for (transfer_index = 0; transfer_index < req->bufnum; ++transfer_index)
1902   {
1903     rtems_bdbuf_buffer *bd = req->bufs [transfer_index].user;
1904     bool waiters = bd->waiters;
1905 
1906     if (waiters)
1907       wake_transfer_waiters = true;
1908     else
1909       wake_buffer_waiters = true;
1910 
1911     rtems_bdbuf_group_release (bd);
1912 
1913     if (sc == RTEMS_SUCCESSFUL && bd->state == RTEMS_BDBUF_STATE_TRANSFER)
1914       rtems_bdbuf_make_cached_and_add_to_lru_list (bd);
1915     else
1916       rtems_bdbuf_discard_buffer (bd);
1917 
1918     if (rtems_bdbuf_tracer)
1919       rtems_bdbuf_show_users ("transfer", bd);
1920   }
1921 
1922   if (wake_transfer_waiters)
1923     rtems_bdbuf_wake (&bdbuf_cache.transfer_waiters);
1924 
1925   if (wake_buffer_waiters)
1926     rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1927 
1928   if (!cache_locked)
1929     rtems_bdbuf_unlock_cache ();
1930 
1931   if (sc == RTEMS_SUCCESSFUL || sc == RTEMS_UNSATISFIED)
1932     return sc;
1933   else
1934     return RTEMS_IO_ERROR;
1935 }
1936 
1937 static rtems_status_code
1938 rtems_bdbuf_execute_read_request (rtems_disk_device  *dd,
1939                                   rtems_bdbuf_buffer *bd,
1940                                   uint32_t            transfer_count)
1941 {
1942   rtems_blkdev_request *req = NULL;
1943   rtems_blkdev_bnum media_block = bd->block;
1944   uint32_t media_blocks_per_block = dd->media_blocks_per_block;
1945   uint32_t block_size = dd->block_size;
1946   uint32_t transfer_index = 1;
1947 
1948   /*
1949    * TODO: This type of request structure is wrong and should be removed.
1950    */
1951 #define bdbuf_alloc(size) __builtin_alloca (size)
1952 
1953   req = bdbuf_alloc (rtems_bdbuf_read_request_size (transfer_count));
1954 
1955   req->req = RTEMS_BLKDEV_REQ_READ;
1956   req->done = rtems_bdbuf_transfer_done;
1957   req->io_task = rtems_task_self ();
1958   req->bufnum = 0;
1959 
1960   rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_TRANSFER);
1961 
1962   req->bufs [0].user   = bd;
1963   req->bufs [0].block  = media_block;
1964   req->bufs [0].length = block_size;
1965   req->bufs [0].buffer = bd->buffer;
1966 
1967   if (rtems_bdbuf_tracer)
1968     rtems_bdbuf_show_users ("read", bd);
1969 
1970   while (transfer_index < transfer_count)
1971   {
1972     media_block += media_blocks_per_block;
1973 
1974     bd = rtems_bdbuf_get_buffer_for_read_ahead (dd, media_block);
1975 
1976     if (bd == NULL)
1977       break;
1978 
1979     rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_TRANSFER);
1980 
1981     req->bufs [transfer_index].user   = bd;
1982     req->bufs [transfer_index].block  = media_block;
1983     req->bufs [transfer_index].length = block_size;
1984     req->bufs [transfer_index].buffer = bd->buffer;
1985 
1986     if (rtems_bdbuf_tracer)
1987       rtems_bdbuf_show_users ("read", bd);
1988 
1989     ++transfer_index;
1990   }
1991 
1992   req->bufnum = transfer_index;
1993 
1994   return rtems_bdbuf_execute_transfer_request (dd, req, true);
1995 }
1996 
1997 static bool
1998 rtems_bdbuf_is_read_ahead_active (const rtems_disk_device *dd)
1999 {
2000   return !rtems_chain_is_node_off_chain (&dd->read_ahead.node);
2001 }
2002 
2003 static void
2004 rtems_bdbuf_read_ahead_cancel (rtems_disk_device *dd)
2005 {
2006   if (rtems_bdbuf_is_read_ahead_active (dd))
2007   {
2008     rtems_chain_extract_unprotected (&dd->read_ahead.node);
2009     rtems_chain_set_off_chain (&dd->read_ahead.node);
2010   }
2011 }
2012 
2013 static void
2014 rtems_bdbuf_read_ahead_reset (rtems_disk_device *dd)
2015 {
2016   rtems_bdbuf_read_ahead_cancel (dd);
2017   dd->read_ahead.trigger = RTEMS_DISK_READ_AHEAD_NO_TRIGGER;
2018 }
2019 
2020 static void
2021 rtems_bdbuf_read_ahead_add_to_chain (rtems_disk_device *dd)
2022 {
2023   rtems_status_code sc;
2024   rtems_chain_control *chain = &bdbuf_cache.read_ahead_chain;
2025 
2026   if (rtems_chain_is_empty (chain))
2027   {
2028     sc = rtems_event_send (bdbuf_cache.read_ahead_task,
2029                            RTEMS_BDBUF_READ_AHEAD_WAKE_UP);
2030     if (sc != RTEMS_SUCCESSFUL)
2031       rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_RA_WAKE_UP);
2032   }
2033 
2034   rtems_chain_append_unprotected (chain, &dd->read_ahead.node);
2035 }
2036 
2037 static void
2038 rtems_bdbuf_check_read_ahead_trigger (rtems_disk_device *dd,
2039                                       rtems_blkdev_bnum  block)
2040 {
2041   if (bdbuf_cache.read_ahead_task != 0
2042       && dd->read_ahead.trigger == block
2043       && !rtems_bdbuf_is_read_ahead_active (dd))
2044   {
2045     dd->read_ahead.nr_blocks = RTEMS_DISK_READ_AHEAD_SIZE_AUTO;
2046     rtems_bdbuf_read_ahead_add_to_chain(dd);
2047   }
2048 }
2049 
2050 static void
2051 rtems_bdbuf_set_read_ahead_trigger (rtems_disk_device *dd,
2052                                     rtems_blkdev_bnum  block)
2053 {
2054   if (dd->read_ahead.trigger != block)
2055   {
2056     rtems_bdbuf_read_ahead_cancel (dd);
2057     dd->read_ahead.trigger = block + 1;
2058     dd->read_ahead.next = block + 2;
2059   }
2060 }
2061 
2062 rtems_status_code
2063 rtems_bdbuf_read (rtems_disk_device   *dd,
2064                   rtems_blkdev_bnum    block,
2065                   rtems_bdbuf_buffer **bd_ptr)
2066 {
2067   rtems_status_code     sc = RTEMS_SUCCESSFUL;
2068   rtems_bdbuf_buffer   *bd = NULL;
2069   rtems_blkdev_bnum     media_block;
2070 
2071   rtems_bdbuf_lock_cache ();
2072 
2073   sc = rtems_bdbuf_get_media_block (dd, block, &media_block);
2074   if (sc == RTEMS_SUCCESSFUL)
2075   {
2076     if (rtems_bdbuf_tracer)
2077       printf ("bdbuf:read: %" PRIu32 " (%" PRIu32 ") (dev = %08x)\n",
2078               media_block, block, (unsigned) dd->dev);
2079 
2080     bd = rtems_bdbuf_get_buffer_for_access (dd, media_block);
2081     switch (bd->state)
2082     {
2083       case RTEMS_BDBUF_STATE_CACHED:
2084         ++dd->stats.read_hits;
2085         rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_CACHED);
2086         break;
2087       case RTEMS_BDBUF_STATE_MODIFIED:
2088         ++dd->stats.read_hits;
2089         rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_MODIFIED);
2090         break;
2091       case RTEMS_BDBUF_STATE_EMPTY:
2092         ++dd->stats.read_misses;
2093         rtems_bdbuf_set_read_ahead_trigger (dd, block);
2094         sc = rtems_bdbuf_execute_read_request (dd, bd, 1);
2095         if (sc == RTEMS_SUCCESSFUL)
2096         {
2097           rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_CACHED);
2098           rtems_chain_extract_unprotected (&bd->link);
2099           rtems_bdbuf_group_obtain (bd);
2100         }
2101         else
2102         {
2103           bd = NULL;
2104         }
2105         break;
2106       default:
2107         rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_4);
2108         break;
2109     }
2110 
2111     rtems_bdbuf_check_read_ahead_trigger (dd, block);
2112   }
2113 
2114   rtems_bdbuf_unlock_cache ();
2115 
2116   *bd_ptr = bd;
2117 
2118   return sc;
2119 }
2120 
2121 void
2122 rtems_bdbuf_peek (rtems_disk_device *dd,
2123                   rtems_blkdev_bnum block,
2124                   uint32_t nr_blocks)
2125 {
2126   rtems_bdbuf_lock_cache ();
2127 
2128   if (bdbuf_cache.read_ahead_enabled && nr_blocks > 0)
2129   {
2130     rtems_bdbuf_read_ahead_reset(dd);
2131     dd->read_ahead.next = block;
2132     dd->read_ahead.nr_blocks = nr_blocks;
2133     rtems_bdbuf_read_ahead_add_to_chain(dd);
2134   }
2135 
2136   rtems_bdbuf_unlock_cache ();
2137 }
2138 
2139 static rtems_status_code
2140 rtems_bdbuf_check_bd_and_lock_cache (rtems_bdbuf_buffer *bd, const char *kind)
2141 {
2142   if (bd == NULL)
2143     return RTEMS_INVALID_ADDRESS;
2144   if (rtems_bdbuf_tracer)
2145   {
2146     printf ("bdbuf:%s: %" PRIu32 "\n", kind, bd->block);
2147     rtems_bdbuf_show_users (kind, bd);
2148   }
2149   rtems_bdbuf_lock_cache();
2150 
2151   return RTEMS_SUCCESSFUL;
2152 }
2153 
2154 rtems_status_code
2155 rtems_bdbuf_release (rtems_bdbuf_buffer *bd)
2156 {
2157   rtems_status_code sc = RTEMS_SUCCESSFUL;
2158 
2159   sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "release");
2160   if (sc != RTEMS_SUCCESSFUL)
2161     return sc;
2162 
2163   switch (bd->state)
2164   {
2165     case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2166       rtems_bdbuf_add_to_lru_list_after_access (bd);
2167       break;
2168     case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
2169     case RTEMS_BDBUF_STATE_ACCESS_PURGED:
2170       rtems_bdbuf_discard_buffer_after_access (bd);
2171       break;
2172     case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2173       rtems_bdbuf_add_to_modified_list_after_access (bd);
2174       break;
2175     default:
2176       rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_0);
2177       break;
2178   }
2179 
2180   if (rtems_bdbuf_tracer)
2181     rtems_bdbuf_show_usage ();
2182 
2183   rtems_bdbuf_unlock_cache ();
2184 
2185   return RTEMS_SUCCESSFUL;
2186 }
2187 
2188 rtems_status_code
2189 rtems_bdbuf_release_modified (rtems_bdbuf_buffer *bd)
2190 {
2191   rtems_status_code sc = RTEMS_SUCCESSFUL;
2192 
2193   sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "release modified");
2194   if (sc != RTEMS_SUCCESSFUL)
2195     return sc;
2196 
2197   switch (bd->state)
2198   {
2199     case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2200     case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
2201     case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2202       rtems_bdbuf_add_to_modified_list_after_access (bd);
2203       break;
2204     case RTEMS_BDBUF_STATE_ACCESS_PURGED:
2205       rtems_bdbuf_discard_buffer_after_access (bd);
2206       break;
2207     default:
2208       rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_6);
2209       break;
2210   }
2211 
2212   if (rtems_bdbuf_tracer)
2213     rtems_bdbuf_show_usage ();
2214 
2215   rtems_bdbuf_unlock_cache ();
2216 
2217   return RTEMS_SUCCESSFUL;
2218 }
2219 
2220 rtems_status_code
2221 rtems_bdbuf_sync (rtems_bdbuf_buffer *bd)
2222 {
2223   rtems_status_code sc = RTEMS_SUCCESSFUL;
2224 
2225   sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "sync");
2226   if (sc != RTEMS_SUCCESSFUL)
2227     return sc;
2228 
2229   switch (bd->state)
2230   {
2231     case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2232     case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
2233     case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2234       rtems_bdbuf_sync_after_access (bd);
2235       break;
2236     case RTEMS_BDBUF_STATE_ACCESS_PURGED:
2237       rtems_bdbuf_discard_buffer_after_access (bd);
2238       break;
2239     default:
2240       rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_5);
2241       break;
2242   }
2243 
2244   if (rtems_bdbuf_tracer)
2245     rtems_bdbuf_show_usage ();
2246 
2247   rtems_bdbuf_unlock_cache ();
2248 
2249   return RTEMS_SUCCESSFUL;
2250 }
2251 
2252 rtems_status_code
2253 rtems_bdbuf_syncdev (rtems_disk_device *dd)
2254 {
2255   if (rtems_bdbuf_tracer)
2256     printf ("bdbuf:syncdev: %08x\n", (unsigned) dd->dev);
2257 
2258   /*
2259    * Take the sync lock before locking the cache. Once we have the sync lock we
2260    * can lock the cache. If another thread has the sync lock it will cause this
2261    * thread to block until it owns the sync lock then it can own the cache. The
2262    * sync lock can only be obtained with the cache unlocked.
2263    */
2264   rtems_bdbuf_lock_sync ();
2265   rtems_bdbuf_lock_cache ();
2266 
2267   /*
2268    * Set the cache to have a sync active for a specific device and let the swap
2269    * out task know the id of the requester to wake when done.
2270    *
2271    * The swap out task will negate the sync active flag when no more buffers
2272    * for the device are held on the "modified for sync" queues.
2273    */
2274   bdbuf_cache.sync_active    = true;
2275   bdbuf_cache.sync_requester = rtems_task_self ();
2276   bdbuf_cache.sync_device    = dd;
2277 
2278   rtems_bdbuf_wake_swapper ();
2279   rtems_bdbuf_unlock_cache ();
2280   rtems_bdbuf_wait_for_transient_event ();
2281   rtems_bdbuf_unlock_sync ();
2282 
2283   return RTEMS_SUCCESSFUL;
2284 }
2285 
2286 /**
2287  * Swapout transfer to the driver. The driver will break this I/O into groups
2288  * of consecutive write requests is multiple consecutive buffers are required
2289  * by the driver. The cache is not locked.
2290  *
2291  * @param transfer The transfer transaction.
2292  */
2293 static void
2294 rtems_bdbuf_swapout_write (rtems_bdbuf_swapout_transfer* transfer)
2295 {
2296   rtems_chain_node *node;
2297 
2298   if (rtems_bdbuf_tracer)
2299     printf ("bdbuf:swapout transfer: %08x\n", (unsigned) transfer->dd->dev);
2300 
2301   /*
2302    * If there are buffers to transfer to the media transfer them.
2303    */
2304   if (!rtems_chain_is_empty (&transfer->bds))
2305   {
2306     /*
2307      * The last block number used when the driver only supports
2308      * continuous blocks in a single request.
2309      */
2310     uint32_t last_block = 0;
2311 
2312     rtems_disk_device *dd = transfer->dd;
2313     uint32_t media_blocks_per_block = dd->media_blocks_per_block;
2314     bool need_continuous_blocks =
2315       (dd->phys_dev->capabilities & RTEMS_BLKDEV_CAP_MULTISECTOR_CONT) != 0;
2316 
2317     /*
2318      * Take as many buffers as configured and pass to the driver. Note, the
2319      * API to the drivers has an array of buffers and if a chain was passed
2320      * we could have just passed the list. If the driver API is updated it
2321      * should be possible to make this change with little effect in this
2322      * code. The array that is passed is broken in design and should be
2323      * removed. Merging members of a struct into the first member is
2324      * trouble waiting to happen.
2325      */
2326     transfer->write_req.status = RTEMS_RESOURCE_IN_USE;
2327     transfer->write_req.bufnum = 0;
2328 
2329     while ((node = rtems_chain_get_unprotected(&transfer->bds)) != NULL)
2330     {
2331       rtems_bdbuf_buffer* bd = (rtems_bdbuf_buffer*) node;
2332       bool                write = false;
2333 
2334       /*
2335        * If the device only accepts sequential buffers and this is not the
2336        * first buffer (the first is always sequential, and the buffer is not
2337        * sequential then put the buffer back on the transfer chain and write
2338        * the committed buffers.
2339        */
2340 
2341       if (rtems_bdbuf_tracer)
2342         printf ("bdbuf:swapout write: bd:%" PRIu32 ", bufnum:%" PRIu32 " mode:%s\n",
2343                 bd->block, transfer->write_req.bufnum,
2344                 need_continuous_blocks ? "MULTI" : "SCAT");
2345 
2346       if (need_continuous_blocks && transfer->write_req.bufnum &&
2347           bd->block != last_block + media_blocks_per_block)
2348       {
2349         rtems_chain_prepend_unprotected (&transfer->bds, &bd->link);
2350         write = true;
2351       }
2352       else
2353       {
2354         rtems_blkdev_sg_buffer* buf;
2355         buf = &transfer->write_req.bufs[transfer->write_req.bufnum];
2356         transfer->write_req.bufnum++;
2357         buf->user   = bd;
2358         buf->block  = bd->block;
2359         buf->length = dd->block_size;
2360         buf->buffer = bd->buffer;
2361         last_block  = bd->block;
2362       }
2363 
2364       /*
2365        * Perform the transfer if there are no more buffers, or the transfer
2366        * size has reached the configured max. value.
2367        */
2368 
2369       if (rtems_chain_is_empty (&transfer->bds) ||
2370           (transfer->write_req.bufnum >= bdbuf_config.max_write_blocks))
2371         write = true;
2372 
2373       if (write)
2374       {
2375         rtems_bdbuf_execute_transfer_request (dd, &transfer->write_req, false);
2376 
2377         transfer->write_req.status = RTEMS_RESOURCE_IN_USE;
2378         transfer->write_req.bufnum = 0;
2379       }
2380     }
2381 
2382     /*
2383      * If sync'ing and the deivce is capability of handling a sync IO control
2384      * call perform the call.
2385      */
2386     if (transfer->syncing &&
2387         (dd->phys_dev->capabilities & RTEMS_BLKDEV_CAP_SYNC))
2388     {
2389       /* int result = */ dd->ioctl (dd->phys_dev, RTEMS_BLKDEV_REQ_SYNC, NULL);
2390       /* How should the error be handled ? */
2391     }
2392   }
2393 }
2394 
2395 /**
2396  * Process the modified list of buffers. There is a sync or modified list that
2397  * needs to be handled so we have a common function to do the work.
2398  *
2399  * @param dd_ptr Pointer to the device to handle. If BDBUF_INVALID_DEV no
2400  * device is selected so select the device of the first buffer to be written to
2401  * disk.
2402  * @param chain The modified chain to process.
2403  * @param transfer The chain to append buffers to be written too.
2404  * @param sync_active If true this is a sync operation so expire all timers.
2405  * @param update_timers If true update the timers.
2406  * @param timer_delta It update_timers is true update the timers by this
2407  *                    amount.
2408  */
2409 static void
2410 rtems_bdbuf_swapout_modified_processing (rtems_disk_device  **dd_ptr,
2411                                          rtems_chain_control* chain,
2412                                          rtems_chain_control* transfer,
2413                                          bool                 sync_active,
2414                                          bool                 update_timers,
2415                                          uint32_t             timer_delta)
2416 {
2417   if (!rtems_chain_is_empty (chain))
2418   {
2419     rtems_chain_node* node = rtems_chain_head (chain);
2420     bool              sync_all;
2421 
2422     node = node->next;
2423 
2424     /*
2425      * A sync active with no valid dev means sync all.
2426      */
2427     if (sync_active && (*dd_ptr == BDBUF_INVALID_DEV))
2428       sync_all = true;
2429     else
2430       sync_all = false;
2431 
2432     while (!rtems_chain_is_tail (chain, node))
2433     {
2434       rtems_bdbuf_buffer* bd = (rtems_bdbuf_buffer*) node;
2435 
2436       /*
2437        * Check if the buffer's hold timer has reached 0. If a sync is active
2438        * or someone waits for a buffer written force all the timers to 0.
2439        *
2440        * @note Lots of sync requests will skew this timer. It should be based
2441        *       on TOD to be accurate. Does it matter ?
2442        */
2443       if (sync_all || (sync_active && (*dd_ptr == bd->dd))
2444           || rtems_bdbuf_has_buffer_waiters ())
2445         bd->hold_timer = 0;
2446 
2447       if (bd->hold_timer)
2448       {
2449         if (update_timers)
2450         {
2451           if (bd->hold_timer > timer_delta)
2452             bd->hold_timer -= timer_delta;
2453           else
2454             bd->hold_timer = 0;
2455         }
2456 
2457         if (bd->hold_timer)
2458         {
2459           node = node->next;
2460           continue;
2461         }
2462       }
2463 
2464       /*
2465        * This assumes we can set it to BDBUF_INVALID_DEV which is just an
2466        * assumption. Cannot use the transfer list being empty the sync dev
2467        * calls sets the dev to use.
2468        */
2469       if (*dd_ptr == BDBUF_INVALID_DEV)
2470         *dd_ptr = bd->dd;
2471 
2472       if (bd->dd == *dd_ptr)
2473       {
2474         rtems_chain_node* next_node = node->next;
2475         rtems_chain_node* tnode = rtems_chain_tail (transfer);
2476 
2477         /*
2478          * The blocks on the transfer list are sorted in block order. This
2479          * means multi-block transfers for drivers that require consecutive
2480          * blocks perform better with sorted blocks and for real disks it may
2481          * help lower head movement.
2482          */
2483 
2484         rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_TRANSFER);
2485 
2486         rtems_chain_extract_unprotected (node);
2487 
2488         tnode = tnode->previous;
2489 
2490         while (node && !rtems_chain_is_head (transfer, tnode))
2491         {
2492           rtems_bdbuf_buffer* tbd = (rtems_bdbuf_buffer*) tnode;
2493 
2494           if (bd->block > tbd->block)
2495           {
2496             rtems_chain_insert_unprotected (tnode, node);
2497             node = NULL;
2498           }
2499           else
2500             tnode = tnode->previous;
2501         }
2502 
2503         if (node)
2504           rtems_chain_prepend_unprotected (transfer, node);
2505 
2506         node = next_node;
2507       }
2508       else
2509       {
2510         node = node->next;
2511       }
2512     }
2513   }
2514 }
2515 
2516 /**
2517  * Process the cache's modified buffers. Check the sync list first then the
2518  * modified list extracting the buffers suitable to be written to disk. We have
2519  * a device at a time. The task level loop will repeat this operation while
2520  * there are buffers to be written. If the transfer fails place the buffers
2521  * back on the modified list and try again later. The cache is unlocked while
2522  * the buffers are being written to disk.
2523  *
2524  * @param timer_delta It update_timers is true update the timers by this
2525  *                    amount.
2526  * @param update_timers If true update the timers.
2527  * @param transfer The transfer transaction data.
2528  *
2529  * @retval true Buffers where written to disk so scan again.
2530  * @retval false No buffers where written to disk.
2531  */
2532 static bool
2533 rtems_bdbuf_swapout_processing (unsigned long                 timer_delta,
2534                                 bool                          update_timers,
2535                                 rtems_bdbuf_swapout_transfer* transfer)
2536 {
2537   rtems_bdbuf_swapout_worker* worker;
2538   bool                        transfered_buffers = false;
2539   bool                        sync_active;
2540 
2541   rtems_bdbuf_lock_cache ();
2542 
2543   /*
2544    * To set this to true you need the cache and the sync lock.
2545    */
2546   sync_active = bdbuf_cache.sync_active;
2547 
2548   /*
2549    * If a sync is active do not use a worker because the current code does not
2550    * cleaning up after. We need to know the buffers have been written when
2551    * syncing to release sync lock and currently worker threads do not return to
2552    * here. We do not know the worker is the last in a sequence of sync writes
2553    * until after we have it running so we do not know to tell it to release the
2554    * lock. The simplest solution is to get the main swap out task perform all
2555    * sync operations.
2556    */
2557   if (sync_active)
2558     worker = NULL;
2559   else
2560   {
2561     worker = (rtems_bdbuf_swapout_worker*)
2562       rtems_chain_get_unprotected (&bdbuf_cache.swapout_free_workers);
2563     if (worker)
2564       transfer = &worker->transfer;
2565   }
2566 
2567   rtems_chain_initialize_empty (&transfer->bds);
2568   transfer->dd = BDBUF_INVALID_DEV;
2569   transfer->syncing = sync_active;
2570 
2571   /*
2572    * When the sync is for a device limit the sync to that device. If the sync
2573    * is for a buffer handle process the devices in the order on the sync
2574    * list. This means the dev is BDBUF_INVALID_DEV.
2575    */
2576   if (sync_active)
2577     transfer->dd = bdbuf_cache.sync_device;
2578 
2579   /*
2580    * If we have any buffers in the sync queue move them to the modified
2581    * list. The first sync buffer will select the device we use.
2582    */
2583   rtems_bdbuf_swapout_modified_processing (&transfer->dd,
2584                                            &bdbuf_cache.sync,
2585                                            &transfer->bds,
2586                                            true, false,
2587                                            timer_delta);
2588 
2589   /*
2590    * Process the cache's modified list.
2591    */
2592   rtems_bdbuf_swapout_modified_processing (&transfer->dd,
2593                                            &bdbuf_cache.modified,
2594                                            &transfer->bds,
2595                                            sync_active,
2596                                            update_timers,
2597                                            timer_delta);
2598 
2599   /*
2600    * We have all the buffers that have been modified for this device so the
2601    * cache can be unlocked because the state of each buffer has been set to
2602    * TRANSFER.
2603    */
2604   rtems_bdbuf_unlock_cache ();
2605 
2606   /*
2607    * If there are buffers to transfer to the media transfer them.
2608    */
2609   if (!rtems_chain_is_empty (&transfer->bds))
2610   {
2611     if (worker)
2612     {
2613       rtems_status_code sc = rtems_event_send (worker->id,
2614                                                RTEMS_BDBUF_SWAPOUT_SYNC);
2615       if (sc != RTEMS_SUCCESSFUL)
2616         rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_SO_WAKE_2);
2617     }
2618     else
2619     {
2620       rtems_bdbuf_swapout_write (transfer);
2621     }
2622 
2623     transfered_buffers = true;
2624   }
2625 
2626   if (sync_active && !transfered_buffers)
2627   {
2628     rtems_id sync_requester;
2629     rtems_bdbuf_lock_cache ();
2630     sync_requester = bdbuf_cache.sync_requester;
2631     bdbuf_cache.sync_active = false;
2632     bdbuf_cache.sync_requester = 0;
2633     rtems_bdbuf_unlock_cache ();
2634     if (sync_requester)
2635       rtems_event_transient_send (sync_requester);
2636   }
2637 
2638   return transfered_buffers;
2639 }
2640 
2641 /**
2642  * The swapout worker thread body.
2643  *
2644  * @param arg A pointer to the worker thread's private data.
2645  * @return rtems_task Not used.
2646  */
2647 static rtems_task
2648 rtems_bdbuf_swapout_worker_task (rtems_task_argument arg)
2649 {
2650   rtems_bdbuf_swapout_worker* worker = (rtems_bdbuf_swapout_worker*) arg;
2651 
2652   while (worker->enabled)
2653   {
2654     rtems_bdbuf_wait_for_event (RTEMS_BDBUF_SWAPOUT_SYNC);
2655 
2656     rtems_bdbuf_swapout_write (&worker->transfer);
2657 
2658     rtems_bdbuf_lock_cache ();
2659 
2660     rtems_chain_initialize_empty (&worker->transfer.bds);
2661     worker->transfer.dd = BDBUF_INVALID_DEV;
2662 
2663     rtems_chain_append_unprotected (&bdbuf_cache.swapout_free_workers, &worker->link);
2664 
2665     rtems_bdbuf_unlock_cache ();
2666   }
2667 
2668   free (worker);
2669 
2670   rtems_task_exit();
2671 }
2672 
2673 /**
2674  * Close the swapout worker threads.
2675  */
2676 static void
2677 rtems_bdbuf_swapout_workers_close (void)
2678 {
2679   rtems_chain_node* node;
2680 
2681   rtems_bdbuf_lock_cache ();
2682 
2683   node = rtems_chain_first (&bdbuf_cache.swapout_free_workers);
2684   while (!rtems_chain_is_tail (&bdbuf_cache.swapout_free_workers, node))
2685   {
2686     rtems_bdbuf_swapout_worker* worker = (rtems_bdbuf_swapout_worker*) node;
2687     worker->enabled = false;
2688     rtems_event_send (worker->id, RTEMS_BDBUF_SWAPOUT_SYNC);
2689     node = rtems_chain_next (node);
2690   }
2691 
2692   rtems_bdbuf_unlock_cache ();
2693 }
2694 
2695 /**
2696  * Body of task which takes care on flushing modified buffers to the disk.
2697  *
2698  * @param arg A pointer to the global cache data. Use the global variable and
2699  *            not this.
2700  * @return rtems_task Not used.
2701  */
2702 static rtems_task
2703 rtems_bdbuf_swapout_task (rtems_task_argument arg)
2704 {
2705   rtems_bdbuf_swapout_transfer* transfer = (rtems_bdbuf_swapout_transfer *) arg;
2706   uint32_t                      period_in_ticks;
2707   const uint32_t                period_in_msecs = bdbuf_config.swapout_period;
2708   uint32_t                      timer_delta;
2709 
2710   /*
2711    * Localise the period.
2712    */
2713   period_in_ticks = RTEMS_MICROSECONDS_TO_TICKS (period_in_msecs * 1000);
2714 
2715   /*
2716    * This is temporary. Needs to be changed to use the real time clock.
2717    */
2718   timer_delta = period_in_msecs;
2719 
2720   while (bdbuf_cache.swapout_enabled)
2721   {
2722     rtems_event_set   out;
2723     rtems_status_code sc;
2724 
2725     /*
2726      * Only update the timers once in the processing cycle.
2727      */
2728     bool update_timers = true;
2729 
2730     /*
2731      * If we write buffers to any disk perform a check again. We only write a
2732      * single device at a time and the cache may have more than one device's
2733      * buffers modified waiting to be written.
2734      */
2735     bool transfered_buffers;
2736 
2737     do
2738     {
2739       transfered_buffers = false;
2740 
2741       /*
2742        * Extact all the buffers we find for a specific device. The device is
2743        * the first one we find on a modified list. Process the sync queue of
2744        * buffers first.
2745        */
2746       if (rtems_bdbuf_swapout_processing (timer_delta,
2747                                           update_timers,
2748                                           transfer))
2749       {
2750         transfered_buffers = true;
2751       }
2752 
2753       /*
2754        * Only update the timers once.
2755        */
2756       update_timers = false;
2757     }
2758     while (transfered_buffers);
2759 
2760     sc = rtems_event_receive (RTEMS_BDBUF_SWAPOUT_SYNC,
2761                               RTEMS_EVENT_ALL | RTEMS_WAIT,
2762                               period_in_ticks,
2763                               &out);
2764 
2765     if ((sc != RTEMS_SUCCESSFUL) && (sc != RTEMS_TIMEOUT))
2766       rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_SWAPOUT_RE);
2767   }
2768 
2769   rtems_bdbuf_swapout_workers_close ();
2770 
2771   free (transfer);
2772 
2773   rtems_task_exit();
2774 }
2775 
2776 static void
2777 rtems_bdbuf_purge_list (rtems_chain_control *purge_list)
2778 {
2779   bool wake_buffer_waiters = false;
2780   rtems_chain_node *node = NULL;
2781 
2782   while ((node = rtems_chain_get_unprotected (purge_list)) != NULL)
2783   {
2784     rtems_bdbuf_buffer *bd = (rtems_bdbuf_buffer *) node;
2785 
2786     if (bd->waiters == 0)
2787       wake_buffer_waiters = true;
2788 
2789     rtems_bdbuf_discard_buffer (bd);
2790   }
2791 
2792   if (wake_buffer_waiters)
2793     rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
2794 }
2795 
2796 static void
2797 rtems_bdbuf_gather_for_purge (rtems_chain_control *purge_list,
2798                               const rtems_disk_device *dd)
2799 {
2800   rtems_bdbuf_buffer *stack [RTEMS_BDBUF_AVL_MAX_HEIGHT];
2801   rtems_bdbuf_buffer **prev = stack;
2802   rtems_bdbuf_buffer *cur = bdbuf_cache.tree;
2803 
2804   *prev = NULL;
2805 
2806   while (cur != NULL)
2807   {
2808     if (cur->dd == dd)
2809     {
2810       switch (cur->state)
2811       {
2812         case RTEMS_BDBUF_STATE_FREE:
2813         case RTEMS_BDBUF_STATE_EMPTY:
2814         case RTEMS_BDBUF_STATE_ACCESS_PURGED:
2815         case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
2816           break;
2817         case RTEMS_BDBUF_STATE_SYNC:
2818           rtems_bdbuf_wake (&bdbuf_cache.transfer_waiters);
2819           /* Fall through */
2820         case RTEMS_BDBUF_STATE_MODIFIED:
2821           rtems_bdbuf_group_release (cur);
2822           /* Fall through */
2823         case RTEMS_BDBUF_STATE_CACHED:
2824           rtems_chain_extract_unprotected (&cur->link);
2825           rtems_chain_append_unprotected (purge_list, &cur->link);
2826           break;
2827         case RTEMS_BDBUF_STATE_TRANSFER:
2828           rtems_bdbuf_set_state (cur, RTEMS_BDBUF_STATE_TRANSFER_PURGED);
2829           break;
2830         case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2831         case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
2832         case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2833           rtems_bdbuf_set_state (cur, RTEMS_BDBUF_STATE_ACCESS_PURGED);
2834           break;
2835         default:
2836           rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_STATE_11);
2837       }
2838     }
2839 
2840     if (cur->avl.left != NULL)
2841     {
2842       /* Left */
2843       ++prev;
2844       *prev = cur;
2845       cur = cur->avl.left;
2846     }
2847     else if (cur->avl.right != NULL)
2848     {
2849       /* Right */
2850       ++prev;
2851       *prev = cur;
2852       cur = cur->avl.right;
2853     }
2854     else
2855     {
2856       while (*prev != NULL
2857              && (cur == (*prev)->avl.right || (*prev)->avl.right == NULL))
2858       {
2859         /* Up */
2860         cur = *prev;
2861         --prev;
2862       }
2863       if (*prev != NULL)
2864         /* Right */
2865         cur = (*prev)->avl.right;
2866       else
2867         /* Finished */
2868         cur = NULL;
2869     }
2870   }
2871 }
2872 
2873 static void
2874 rtems_bdbuf_do_purge_dev (rtems_disk_device *dd)
2875 {
2876   rtems_chain_control purge_list;
2877 
2878   rtems_chain_initialize_empty (&purge_list);
2879   rtems_bdbuf_read_ahead_reset (dd);
2880   rtems_bdbuf_gather_for_purge (&purge_list, dd);
2881   rtems_bdbuf_purge_list (&purge_list);
2882 }
2883 
2884 void
2885 rtems_bdbuf_purge_dev (rtems_disk_device *dd)
2886 {
2887   rtems_bdbuf_lock_cache ();
2888   rtems_bdbuf_do_purge_dev (dd);
2889   rtems_bdbuf_unlock_cache ();
2890 }
2891 
2892 rtems_status_code
2893 rtems_bdbuf_set_block_size (rtems_disk_device *dd,
2894                             uint32_t           block_size,
2895                             bool               sync)
2896 {
2897   rtems_status_code sc = RTEMS_SUCCESSFUL;
2898 
2899   /*
2900    * We do not care about the synchronization status since we will purge the
2901    * device later.
2902    */
2903   if (sync)
2904     (void) rtems_bdbuf_syncdev (dd);
2905 
2906   rtems_bdbuf_lock_cache ();
2907 
2908   if (block_size > 0)
2909   {
2910     size_t bds_per_group = rtems_bdbuf_bds_per_group (block_size);
2911 
2912     if (bds_per_group != 0)
2913     {
2914       int block_to_media_block_shift = 0;
2915       uint32_t media_blocks_per_block = block_size / dd->media_block_size;
2916       uint32_t one = 1;
2917 
2918       while ((one << block_to_media_block_shift) < media_blocks_per_block)
2919       {
2920         ++block_to_media_block_shift;
2921       }
2922 
2923       if ((dd->media_block_size << block_to_media_block_shift) != block_size)
2924         block_to_media_block_shift = -1;
2925 
2926       dd->block_size = block_size;
2927       dd->block_count = dd->size / media_blocks_per_block;
2928       dd->media_blocks_per_block = media_blocks_per_block;
2929       dd->block_to_media_block_shift = block_to_media_block_shift;
2930       dd->bds_per_group = bds_per_group;
2931 
2932       rtems_bdbuf_do_purge_dev (dd);
2933     }
2934     else
2935     {
2936       sc = RTEMS_INVALID_NUMBER;
2937     }
2938   }
2939   else
2940   {
2941     sc = RTEMS_INVALID_NUMBER;
2942   }
2943 
2944   rtems_bdbuf_unlock_cache ();
2945 
2946   return sc;
2947 }
2948 
2949 static rtems_task
2950 rtems_bdbuf_read_ahead_task (rtems_task_argument arg)
2951 {
2952   rtems_chain_control *chain = &bdbuf_cache.read_ahead_chain;
2953 
2954   while (bdbuf_cache.read_ahead_enabled)
2955   {
2956     rtems_chain_node *node;
2957 
2958     rtems_bdbuf_wait_for_event (RTEMS_BDBUF_READ_AHEAD_WAKE_UP);
2959     rtems_bdbuf_lock_cache ();
2960 
2961     while ((node = rtems_chain_get_unprotected (chain)) != NULL)
2962     {
2963       rtems_disk_device *dd =
2964         RTEMS_CONTAINER_OF (node, rtems_disk_device, read_ahead.node);
2965       rtems_blkdev_bnum block = dd->read_ahead.next;
2966       rtems_blkdev_bnum media_block = 0;
2967       rtems_status_code sc =
2968         rtems_bdbuf_get_media_block (dd, block, &media_block);
2969 
2970       rtems_chain_set_off_chain (&dd->read_ahead.node);
2971 
2972       if (sc == RTEMS_SUCCESSFUL)
2973       {
2974         rtems_bdbuf_buffer *bd =
2975           rtems_bdbuf_get_buffer_for_read_ahead (dd, media_block);
2976 
2977         if (bd != NULL)
2978         {
2979           uint32_t transfer_count = dd->read_ahead.nr_blocks;
2980           uint32_t blocks_until_end_of_disk = dd->block_count - block;
2981           uint32_t max_transfer_count = bdbuf_config.max_read_ahead_blocks;
2982 
2983           if (transfer_count == RTEMS_DISK_READ_AHEAD_SIZE_AUTO) {
2984             transfer_count = blocks_until_end_of_disk;
2985 
2986             if (transfer_count >= max_transfer_count)
2987             {
2988               transfer_count = max_transfer_count;
2989               dd->read_ahead.trigger = block + transfer_count / 2;
2990               dd->read_ahead.next = block + transfer_count;
2991             }
2992             else
2993             {
2994               dd->read_ahead.trigger = RTEMS_DISK_READ_AHEAD_NO_TRIGGER;
2995             }
2996           } else {
2997             if (transfer_count > blocks_until_end_of_disk) {
2998               transfer_count = blocks_until_end_of_disk;
2999             }
3000 
3001             if (transfer_count > max_transfer_count) {
3002               transfer_count = max_transfer_count;
3003             }
3004 
3005             ++dd->stats.read_ahead_peeks;
3006           }
3007 
3008           ++dd->stats.read_ahead_transfers;
3009           rtems_bdbuf_execute_read_request (dd, bd, transfer_count);
3010         }
3011       }
3012       else
3013       {
3014         dd->read_ahead.trigger = RTEMS_DISK_READ_AHEAD_NO_TRIGGER;
3015       }
3016     }
3017 
3018     rtems_bdbuf_unlock_cache ();
3019   }
3020 
3021   rtems_task_exit();
3022 }
3023 
3024 void rtems_bdbuf_get_device_stats (const rtems_disk_device *dd,
3025                                    rtems_blkdev_stats      *stats)
3026 {
3027   rtems_bdbuf_lock_cache ();
3028   *stats = dd->stats;
3029   rtems_bdbuf_unlock_cache ();
3030 }
3031 
3032 void rtems_bdbuf_reset_device_stats (rtems_disk_device *dd)
3033 {
3034   rtems_bdbuf_lock_cache ();
3035   memset (&dd->stats, 0, sizeof(dd->stats));
3036   rtems_bdbuf_unlock_cache ();
3037 }