Argonne National Laboratory

Experimental Physics and
Industrial Control System

1994  1995  1996  1997  1998  1999  2000  2001  2002  2003  2004  2005  2006  2007  2008  2009  <20102011  2012  2013  2014  2015  2016  2017  Index 1994  1995  1996  1997  1998  1999  2000  2001  2002  2003  2004  2005  2006  2007  2008  2009  <20102011  2012  2013  2014  2015  2016  2017 
<== Date ==> <== Thread ==>

Subject: Re: epicsEvent::invalidSemaphore exception in timerQueue
From: Daron Chabot <daronchabot@gmail.com>
To: Dirk Zimoch <dirk.zimoch@psi.ch>
Cc: ebner <simon.ebner@psi.ch>, EPICS <tech-talk@aps.anl.gov>
Date: Thu, 6 May 2010 10:30:02 -0400
Hi Dirk,

Anytime I've seen an error like this on a real-time OS, it was always because a trashed stack or blown array has nuked the mutex ID (or message-queue ID, etc).

I've been able to track these down by using GNU binutils to see how symbols are laid out in memory. In my last instance of this problem, I noticed that the problematic mutex ID immediately followed an array in my binary's BSS code section. A quick review of the code populating that array showed that I was writing past the end of the array and trashing the mutex's ID...

HTH


-- dc



On Thu, May 6, 2010 at 9:51 AM, Dirk Zimoch <dirk.zimoch@psi.ch> wrote:
Hi all,

We are having a strange problem where a TimerQueue task gets suspended because of a invalidSemaphore exception and we can't wind out where.

We are using EPICS 3.14.8 on vxWorks.

Here is the error message:

0x1fb87960 (timerQueue): Unhandled C++ exception resulted in call to terminate
epicsThread: Unexpected C++ exception "epicsEvent::invalidSemaphore()" with type "Q210epicsEvent16invalidSemaphore" in thread "timerQueue" at THU MAY 06 2010 10:19:21.310783060

The dead task:
timerQueue a94c08       1fb87960 148 SUSPEND      23d410 1fb87470  3d0002     0

And the stack trace:
tt 0x1fb87960
243c24 vxTaskEntry    +68 : a94c08 (&epicsThreadCallEntryPoint, 1f9d24fc)
a94c84 epicsThreadOnceOsd+174: a7b230 ()
a7b230 epicsThreadCallEntryPoint+5c8: __cp_exception_info ()
15e2fc __cp_exception_info+0  : __default_unexpected(void) ()
15e2b8 set_terminate(void (*)(void))+0  : terminate(void) ()
15e2a8 __default_unexpected(void)+0  : cplusTerminate(void) ()
15d068 cplusTerminate(void)+50 : taskSuspend ()

I found out that epicsEvent::invalidSemaphore is thrown by epicsEvent::wait() and its variants when semTake failes for other reasons than timeout (which can only be an invalid SEM_ID).
Unfortunately the stack trace is not very helpful to find out where the actual error happened (thanks to the C++ exception mechanism).

The error happens when (shortly after) the attached SNL program finishes the entry block of state "active". We are using seq version 2.0.10.

Any idea how to find out where the problem really is? Who might own the timerQueue? What corrupted epicsEvent the timerQueue might wait for? How the epicsEvent semaphore might got corrupted?

Dirk

/*
 * $Header: /cvs/G/OTFSCAN/src/OTFScanBase/otfScanLoop.st,v 1.21 2010/05/04 11:34:01 ebner Exp $
 *
 * otfScanLoop.st
 * ============
 *
 * SNL program performing a scan
 *
 * Usage:
 * seq &otfScanLoop, "cprefix=<pv-prefix>"
 *
 * Example:
 * seq &otfScanLoop, "cprefix=MTEST-HW3-OTFX"
 */

program otfScanLoop ("cprefix=ERROR")

%% #include <taskLib.h>
%% #include <sysLib.h>
%% #include <string.h>

/*Connect timeout in seconds*/
int timeout = 8;
int timeoutCCode = 8;
int waitDMOV = 4;

string ca_uStatus;
assign ca_uStatus to "{cprefix}:USTAT";
monitor ca_uStatus;

string ca_uCommand;
assign ca_uCommand to "{cprefix}:UCOM";
monitor ca_uCommand;

string ca_uMotor;
assign ca_uMotor to "{cprefix}:UMOT";
monitor ca_uMotor;

double ca_cBegin;
assign ca_cBegin to "{cprefix}:CBEG";
monitor ca_cBegin;

double ca_cEnd;
assign ca_cEnd to "{cprefix}:CEND";
monitor ca_cEnd;

double ca_cVelocity;
assign ca_cVelocity to "{cprefix}:CMVEL";
monitor ca_cVelocity;

string ca_status;
assign ca_status to "{cprefix}:STATUS";
monitor ca_status;

int ca_scanRunning;
assign ca_scanRunning to "{cprefix}:SCRU";

double ca_sStartScaler;
assign ca_sStartScaler to "{cprefix}:SSTART.PROC";

double ca_sStopScaler;
assign ca_sStopScaler to "{cprefix}:SSTOP.PROC";

/* Writing a 0 to :FCNT.B will process the record and increment the counter */
double ca_fCounter;
assign ca_fCounter to "{cprefix}:FCNT.B";

/* Zig/Zag scan specific*/
double ca_uBegin;
assign ca_uBegin to "{cprefix}:UBEG";

double ca_uEnd;
assign ca_uEnd to "{cprefix}:UEND";

int ca_uZigZag;
assign ca_uZigZag to "{cprefix}:UZIGZ";

int ca_mUseMinVelo;
assign ca_mUseMinVelo to "{cprefix}:UMMIN";
monitor ca_mUseMinVelo;

/* Error Message */
string ca_message;
assign ca_message to "{cprefix}:MSG";

double ca_motor;
assign ca_motor to "";

int ca_motorDone;
assign ca_motorDone to "";
monitor ca_motorDone;

double ca_motorVelo;
assign ca_motorVelo to "";

double ca_motorBdst;
assign ca_motorBdst to "";

double ca_motorVbas;
assign ca_motorVbas to "";

double ca_motorStop;
assign ca_motorStop to "";

char motor[128];
char tmpPvName[128];

double tmpValueDouble;
assign tmpValueDouble to "";

double backupBacklash;
double backupVelocity;
double backupMinVelocity;

ss otfScanLoop {

       state setup {
               when( pvAssignCount() == pvConnectCount() ){
               } state inactive
       }

       state inactive {

               entry{
                       strcpy (ca_uStatus, "INACTIVE");
               pvPut(ca_uStatus);

               ca_scanRunning = 0;
                       pvPut(ca_scanRunning);
               }

               when(strcmp (ca_uCommand, "START") == 0){

                       ca_scanRunning = 1;
                       pvPut(ca_scanRunning);

                       /* Set user command to none - statement is here because of timing issues! */
                       strcpy (ca_uCommand, "NONE");
               pvPut(ca_uCommand);

               /* Determine specified motor */
               pvGet (ca_uMotor);

                       if(strcmp (motor, ca_uMotor) != 0){
                               strcpy(motor, ca_uMotor);

                               printf("[Inactive] Connect motor channels of motor %s\n",motor);
                               sprintf(tmpPvName,  "%s.VAL",  motor);
                               pvAssign(ca_motor, tmpPvName);

                       sprintf(tmpPvName,  "%s.DMOV",  motor);
                       pvStopMonitor(ca_motorDone);
                               pvAssign(ca_motorDone, tmpPvName);
                               pvMonitor (ca_motorDone);

                       sprintf(tmpPvName,  "%s.VELO",  motor);
                               pvAssign(ca_motorVelo, tmpPvName);

                       sprintf(tmpPvName,  "%s.BDST",  motor);
                               pvAssign(ca_motorBdst, tmpPvName);

                       sprintf(tmpPvName,  "%s.VBAS",  motor);
                               pvAssign(ca_motorVbas, tmpPvName);

                       sprintf(tmpPvName,  "%s.STOP",  motor);
                               pvAssign(ca_motorStop, tmpPvName);
                       }
               } state connect
       }

       state connect{
               when( pvAssignCount() == pvConnectCount() ){
                       printf("[Connect] All motor channels are connected\n");
               } state initialize

               when(delay(timeout)){
                       printf("[Connect] Cannot connect motor channels\n");
               } state fault
       }

       state initialize {

               entry{
                       strcpy (ca_uStatus, "INITIALIZE");
               pvPut(ca_uStatus);

               /* Move motor to start position */
               pvGet(ca_cBegin);
               ca_motor = ca_cBegin;
               printf("[Initialize] Move motor to start position: %f \n", ca_motor);
               pvPut(ca_motor);
               pvFlush();

               /* Ensure that DMOV was set to 0 */
               taskDelay((sysClkRateGet()*waitDMOV));
               }

               when (strcmp (ca_uCommand, "STOP") == 0) {
                       printf("[Initialize] Stop command issued\n");

                       strcpy (ca_uCommand, "NONE");
               pvPut(ca_uCommand);

               printf("[Initialize] Stop motor\n");
                       pvGet(ca_motorStop);
                       ca_motorStop = 1;
                       pvPut(ca_motorStop);

               } state inactive

               when(ca_motorDone == 1){
                       printf("[Initialize] Motor reached start position\n");

                       printf("[Initialize] Initialize C code\n");
                       strcpy (ca_status, "INITIALIZE");
                       pvPut(ca_status);
               } state waitCActive
       }


       state waitCActive {
               when(strcmp (ca_status, "ACTIVE") == 0){
                       printf("[Wait C Active] C code is active\n");
               } state active

               when(delay(timeoutCCode)){
                       printf("[Wait C Active] C code was not activated within %d seconds\n", timeoutCCode);
               } state fault
       }

       state active {

               entry{
                       strcpy (ca_uStatus, "ACTIVE");
               pvPut(ca_uStatus);

                       printf("[Active] Backup and set motor settings for scan\n");

                       /* Set motor base velocity to 0 if use min flag is 0 */
                       if(ca_mUseMinVelo == 0){
                               pvGet(ca_motorVbas);
                               backupMinVelocity = ca_motorVbas;
                               ca_motorVbas = 0;
                               pvPut(ca_motorVbas);
                       }

                       /* Backup and set motor velocity */
                       pvGet(ca_motorVelo);
                       backupVelocity = ca_motorVelo;
                       pvGet(ca_cVelocity);
                       ca_motorVelo = ca_cVelocity;
                       pvPut(ca_motorVelo);

                       /* Backup and set motor backlash */
                       pvGet(ca_motorBdst);
                       backupBacklash = ca_motorBdst;
                       ca_motorBdst = 0;
                       pvPut(ca_motorBdst);

                       printf("[Active] Set and backuped motor settings: velocity=%f/%f backlash=%f/%f\n", ca_motorVelo, backupVelocity, ca_motorBdst, backupBacklash);

                       printf("[Active] Start scaler\n");
                       ca_sStartScaler = 1;
                       pvPut(ca_sStartScaler);

                       pvGet(ca_cEnd);
                       ca_motor = ca_cEnd;
                       printf("[Active] Move motor to end position: %f\n",ca_cEnd);
                       pvPut(ca_motor);
                       pvFlush();

               /* Ensure that DMOV was set to 0 */
               taskDelay((sysClkRateGet()*waitDMOV));
               }

               when (strcmp (ca_status, "FAULT") == 0 || strcmp (ca_status, "ERROR") == 0 || strcmp (ca_status, "INITIALIZE") == 0) {
                       printf("[Active] An error occurred in the C code\n");
               } state fault

               when(ca_motorDone == 1) {
                       printf("[Active] Motor reached end position\n");
               } state stop

               when (strcmp (ca_uCommand, "STOP") == 0) {
                       printf("[Active] Stop command issued\n");

                       strcpy (ca_uCommand, "NONE");
               pvPut(ca_uCommand);

               printf("[Active] Stop motor\n");
                       pvGet(ca_motorStop);
                       ca_motorStop = 1;
                       pvPut(ca_motorStop);

               printf("[Active] Terminate C scan logic\n");
                       strcpy (ca_status, "STOP");
                       pvPut(ca_status);
               } state stop

               exit{
                       printf("[Active] Stop scaler\n");
                       ca_sStopScaler = 1;
                       pvPut(ca_sStopScaler);

                       printf("[Active] Restore motor settings: velocity=%f backlash=%f\n", backupVelocity, backupBacklash);

                       /* Restore motor base velocity to 0 if use min flag is 0 */
                       if(ca_mUseMinVelo == 0){
                               pvGet(ca_motorVbas);
                               ca_motorVbas = backupMinVelocity;
                               pvPut(ca_motorVbas);
                       }
                       /* Restore motor velocity */
                       pvGet(ca_motorVelo);
                       ca_motorVelo = backupVelocity;
                       pvPut(ca_motorVelo);

                       /* Restore motor velocity */
                       pvGet(ca_motorBdst);
                       ca_motorBdst = backupBacklash;
                       pvPut(ca_motorBdst);

                       /* Increase file counter */
                       ca_fCounter = 0;
                       pvPut(ca_fCounter);

                       /* Zig/Zag scan for multi dimensional scans */
                       pvGet(ca_uZigZag);
                       if(ca_uZigZag == 1){
                               printf("[Active] Zig/Zag - Switch begin and end position\n");
                               pvGet(ca_uBegin);
                               tmpValueDouble = ca_uBegin;
                               pvGet(ca_uEnd);
                               ca_uBegin = ca_uEnd;
                               ca_uEnd = tmpValueDouble;
                               pvPut(ca_uBegin);
                               pvPut(ca_uEnd);
                       }
               }
       }

       state stop{
               entry{
                       strcpy (ca_uStatus, "STOP");
               pvPut(ca_uStatus);
               }

               when(strcmp (ca_status, "INACTIVE") == 0){
                       printf("[Stop] C code is inactive\n");
               } state inactive

               when(delay(timeoutCCode)){
                       printf("[Stop] C code was not inactivated within %d seconds\n", timeoutCCode);
               } state fault
       }

       state fault {
               entry{
                       strcpy (ca_uStatus, "FAULT");
               pvPut(ca_uStatus);

               /* Write error message */
                       strcpy (ca_message, "SNL in fault state");
               pvPut(ca_message);

               ca_scanRunning = 0;
                       pvPut(ca_scanRunning);
               }

               when (strcmp (ca_status, "INACTIVE") == 0) {
                       printf("[Fault] Fault state cleared\n");

                       /* Set user command to NONE */
                       strcpy (ca_uCommand, "NONE");
               pvPut(ca_uCommand);

               } state inactive
       }
}


References:
epicsEvent::invalidSemaphore exception in timerQueue Dirk Zimoch

Navigate by Date:
Prev: epicsEvent::invalidSemaphore exception in timerQueue Dirk Zimoch
Next: RE: epicsEvent::invalidSemaphore exception in timerQueue Jeff Hill
Index: 1994  1995  1996  1997  1998  1999  2000  2001  2002  2003  2004  2005  2006  2007  2008  2009  <20102011  2012  2013  2014  2015  2016  2017 
Navigate by Thread:
Prev: epicsEvent::invalidSemaphore exception in timerQueue Dirk Zimoch
Next: RE: epicsEvent::invalidSemaphore exception in timerQueue Jeff Hill
Index: 1994  1995  1996  1997  1998  1999  2000  2001  2002  2003  2004  2005  2006  2007  2008  2009  <20102011  2012  2013  2014  2015  2016  2017 
ANJ, 02 Sep 2010 Valid HTML 4.01! · Home · News · About · Base · Modules · Extensions · Distributions · Download ·
· EPICS V4 · IRMIS · Talk · Bugs · Documents · Links · Licensing ·