3030 import os
3131 import threading
3232 import time
33+ import errno
3334 from sonic_py_common .logger import Logger
3435 from sonic_py_common .general import check_output_pipe
3536 from . import utils
207208
208209MAX_ATTEMPTS = 50
209210RETRY_SLEEP_SEC = 0.1
211+ EEPROM_RETRY_ERR_THRESHOLD = 10
210212
211213STATE_DOWN = 'Down' # Initial state
212214STATE_INIT = 'Initializing' # Module starts initializing, check module present, also power on the module if need
@@ -468,24 +470,44 @@ def wait_sfp_eeprom_ready(cls, sfp_list, wait_time):
468470
469471 def read_eeprom (self , offset , num_bytes , log_on_error = True ):
470472 """
471- Read eeprom specific bytes beginning from a random offset with size as num_bytes. Tries up to 50 times total on every 0.1s.
473+ Read eeprom specific bytes beginning from a random offset with size as num_bytes.
474+ Retries up to 50 times in total (every 0.1s), but only if previous attempts failed due to I2C errors
475+ (errno.EIO, typically reported as -5 from the kernel).
476+
472477 Returns:
473- bytearray, if raw sequence of bytes are read correctly from the offset of size num_bytes
474- None, if the read_eeprom fails
478+ bytearray: If the data was successfully read.
479+ None: If all attempts failed (whether due to I2C errors after max retries, or due to other errors without retry).
475480 """
476481 for attempt in range (MAX_ATTEMPTS ):
477- result = self ._read_eeprom (offset , num_bytes , log_on_error )
482+ result , err = self ._read_eeprom (offset , num_bytes , log_on_error )
478483 if result is not None :
479- logger .log_notice (
484+ logger .log_debug (
480485 f"EEPROM read success after attempt { attempt + 1 } /{ MAX_ATTEMPTS } "
481486 f"(sfp={ self .sdk_index } , offset={ offset } , size={ num_bytes } )" )
482487 return result
483- if attempt < MAX_ATTEMPTS - 1 : # only sleep if another retry will happen
484- logger .log_notice (
485- f"EEPROM read attempt { attempt + 1 } /{ MAX_ATTEMPTS } failed "
488+
489+ log_func = (logger .log_error if attempt + 1 > EEPROM_RETRY_ERR_THRESHOLD else logger .log_debug )
490+
491+ # Retry only on EIO (-5)
492+ if err == errno .EIO and attempt < MAX_ATTEMPTS - 1 :
493+ log_func (
494+ f"EEPROM read attempt { attempt + 1 } /{ MAX_ATTEMPTS } failed with I2C error "
486495 f"(sfp={ self .sdk_index } , offset={ offset } , size={ num_bytes } ). "
487496 f"Retrying in { RETRY_SLEEP_SEC :.1f} s..." )
488497 time .sleep (RETRY_SLEEP_SEC )
498+ continue
499+
500+ # Non-I2C-error or last attempt → stop retrying
501+ if err is not None :
502+ log_func (
503+ f"EEPROM read failed (errno={ err } , { os .strerror (err )} ) "
504+ f"after attempt { attempt + 1 } /{ MAX_ATTEMPTS } "
505+ f"(sfp={ self .sdk_index } , offset={ offset } , size={ num_bytes } )" )
506+ else :
507+ log_func (
508+ f"EEPROM read failed after attempt { attempt + 1 } /{ MAX_ATTEMPTS } "
509+ f"(sfp={ self .sdk_index } , offset={ offset } , size={ num_bytes } )" )
510+ return None
489511
490512 if log_on_error :
491513 logger .log_error (
@@ -503,26 +525,29 @@ def _read_eeprom(self, offset, num_bytes, log_on_error=True):
503525 log_on_error (bool, optional): whether log error when exception occurs. Defaults to True.
504526
505527 Returns:
506- bytearray: the content of EEPROM
528+ (bytearray, None): On success, returns the data read and None for errno.
529+ (None, errno): On failure due to a specific OS error, returns None and the errno value.
530+ (None, None): On failure without an associated errno (e.g., empty page or invalid page).
507531 """
508532 result = bytearray (0 )
509533 while num_bytes > 0 :
510534 _ , page , page_offset = self ._get_page_and_page_offset (offset )
511535 if not page :
512- return None
536+ return None , None
513537
514538 try :
515539 with open (page , mode = 'rb' , buffering = 0 ) as f :
516540 f .seek (page_offset )
517541 content = f .read (num_bytes )
542+ read_length = len (content )
543+ if read_length == 0 :
544+ logger .log_error (f'SFP { self .sdk_index } : EEPROM page { page } is empty, no data retrieved' )
545+ return None , None
546+
518547 if not result :
519548 result = content
520549 else :
521550 result += content
522- read_length = len (content )
523- if read_length == 0 :
524- logger .log_error (f'SFP { self .sdk_index } : EEPROM page { page } is empty, no data retrieved' )
525- return None
526551 num_bytes -= read_length
527552 if num_bytes > 0 :
528553 page_size = f .seek (0 , os .SEEK_END )
@@ -531,48 +556,76 @@ def _read_eeprom(self, offset, num_bytes, log_on_error=True):
531556 else :
532557 # Indicate read finished
533558 num_bytes = 0
559+
534560 if ctypes .get_errno () != 0 :
535- raise IOError ( f'errno = { os . strerror ( ctypes .get_errno ()) } ' )
561+ return None , ctypes .get_errno ()
536562
537563 logger .log_debug (
538564 f"read EEPROM sfp={ self .sdk_index } , page={ page } , page_offset={ page_offset } , "
539565 f"size={ read_length } , data={ content } "
540566 )
541567
542- except ( OSError , IOError ) as e :
568+ except OSError as e :
543569 if log_on_error :
544- logger .log_warning (
545- f"Failed to read sfp={ self .sdk_index } EEPROM page={ page } , page_offset={ page_offset } , "
546- f"size={ num_bytes } , offset={ offset } , error = { e } "
547- )
548- return None
570+ if e .errno is not None :
571+ logger .log_warning (
572+ f"Failed to read sfp={ self .sdk_index } EEPROM page={ page } , page_offset={ page_offset } , "
573+ f"size={ num_bytes } , offset={ offset } , error={ e } "
574+ f"(errno={ e .errno } , { os .strerror (e .errno )} )"
575+ )
576+ else :
577+ logger .log_warning (
578+ f"Failed to read sfp={ self .sdk_index } EEPROM page={ page } , page_offset={ page_offset } , "
579+ f"size={ num_bytes } , offset={ offset } , error={ e } "
580+ )
581+ return None , e .errno
582+
583+ return bytearray (result ), None
549584
550- return bytearray (result )
551585
552586 def write_eeprom (self , offset , num_bytes , write_buffer ):
553587 """
554- write eeprom specific bytes beginning from a random offset with size as num_bytes
555- and write_buffer as the required bytes. Tries up to 50 times total on every 0.1s.
588+ Write EEPROM specific bytes beginning from a random offset with size as num_bytes
589+ and write_buffer as the required bytes. Retries up to 50 times (every 0.1s) only if
590+ previous attempts failed due to I2C errors (errno.EIO).
591+
556592 Returns:
557- Boolean, true if the write succeeded and false if it did not succeed.
593+ Boolean, True if the write succeeded, False if it did not succeed.
558594 """
559595 if num_bytes != len (write_buffer ):
560596 logger .log_error ("Error mismatch between buffer length and number of bytes to be written" )
561597 return False
562598
563599 for attempt in range (MAX_ATTEMPTS ):
564- ret = self ._write_eeprom (offset , num_bytes , write_buffer )
600+ ret , err = self ._write_eeprom (offset , num_bytes , write_buffer )
565601 if ret :
566- logger .log_notice (
602+ logger .log_debug (
567603 f"EEPROM write success after attempt { attempt + 1 } /{ MAX_ATTEMPTS } "
568604 f"for sfp={ self .sdk_index } , offset={ offset } , size={ num_bytes } " )
569605 return True
570- logger .log_notice (
571- f"EEPROM write attempt { attempt + 1 } /{ MAX_ATTEMPTS } failed "
572- f"for sfp={ self .sdk_index } , offset={ offset } , size={ num_bytes } . "
573- f"Retrying in { RETRY_SLEEP_SEC :.1f} s..."
574- )
575- time .sleep (RETRY_SLEEP_SEC )
606+
607+ log_func = (logger .log_error if attempt + 1 > EEPROM_RETRY_ERR_THRESHOLD else logger .log_debug )
608+
609+ # Retry only on EIO (-5)
610+ if err == errno .EIO and attempt < MAX_ATTEMPTS - 1 :
611+ log_func (
612+ f"EEPROM write attempt { attempt + 1 } /{ MAX_ATTEMPTS } failed with I2C error "
613+ f"for sfp={ self .sdk_index } , offset={ offset } , size={ num_bytes } . "
614+ f"Retrying in { RETRY_SLEEP_SEC :.1f} s..." )
615+ time .sleep (RETRY_SLEEP_SEC )
616+ continue
617+
618+ # Non-I2C-error or last attempt → stop retrying
619+ if err is not None :
620+ log_func (
621+ f"EEPROM write failed (errno={ err } , { os .strerror (err )} ) "
622+ f"for sfp={ self .sdk_index } , offset={ offset } , size={ num_bytes } "
623+ f"after attempt { attempt + 1 } /{ MAX_ATTEMPTS } " )
624+ else :
625+ log_func (
626+ f"EEPROM write failed for sfp={ self .sdk_index } , offset={ offset } , size={ num_bytes } "
627+ f"after attempt { attempt + 1 } /{ MAX_ATTEMPTS } " )
628+ return False
576629
577630 logger .log_error (
578631 f"EEPROM write failed after { MAX_ATTEMPTS } attempts "
@@ -585,19 +638,23 @@ def _write_eeprom(self, offset, num_bytes, write_buffer):
585638 Single-attempt write: write eeprom specific bytes beginning from a random offset with size as num_bytes
586639 and write_buffer as the required bytes
587640 Returns:
588- Boolean, true if the write succeeded and false if it did not succeed.
641+ (True, None): On success.
642+ (False, errno): On failure with a specific OS error.
643+ (False, None): On failure without an associated errno (e.g., unexpected short write).
644+
589645 Example:
590646 mlxreg -d /dev/mst/mt52100_pciconf0 --reg_name MCIA --indexes slot_index=0,module=1,device_address=154,page_number=5,i2c_device_address=0x50,size=1,bank_number=0 --set dword[0]=0x01000000 -y
591647 """
592648 while num_bytes > 0 :
593649 page_num , page , page_offset = self ._get_page_and_page_offset (offset )
594650 if not page :
595- return False
651+ return False , None
596652
597653 try :
598654 if self ._is_write_protected (page_num , page_offset , num_bytes ):
599655 # write limited eeprom is not supported
600- raise IOError ('write limited bytes' )
656+ raise OSError (errno .EPERM , 'write limited bytes' )
657+
601658 with open (page , mode = 'r+b' , buffering = 0 ) as f :
602659 f .seek (page_offset )
603660 ret = f .write (write_buffer [0 :num_bytes ])
@@ -609,19 +666,29 @@ def _write_eeprom(self, offset, num_bytes, write_buffer):
609666 write_buffer = write_buffer [ret :num_bytes ]
610667 offset += ret
611668 else :
612- raise IOError (f'write return code = { ret } ' )
669+ logger .log_error (
670+ f"Unexpected short write (ret={ ret } < { num_bytes } ) "
671+ f"for sfp={ self .sdk_index } , page={ page } , page_offset={ page_offset } , offset={ offset } "
672+ )
673+ return False , None
613674 num_bytes -= ret
614675 if ctypes .get_errno () != 0 :
615- raise IOError (f'errno = { os .strerror (ctypes .get_errno ())} ' )
616- logger .log_debug ('write EEPROM sfp={}, page={}, page_offset={}, size={}, left={}, data={}' .format (self .sdk_index , page , page_offset , ret , num_bytes , written_buffer ))
617- except (OSError , IOError ) as e :
676+ return False , ctypes .get_errno ()
677+
678+ logger .log_debug (
679+ 'write EEPROM sfp={}, page={}, page_offset={}, size={}, left={}, data={}'
680+ .format (self .sdk_index , page , page_offset , ret , num_bytes , written_buffer )
681+ )
682+
683+ except OSError as e :
618684 data = '' .join ('{:02x}' .format (x ) for x in write_buffer )
619685 logger .log_error (
620686 f"Failed to write EEPROM: sfp={ self .sdk_index } , page={ page } , page_offset={ page_offset } , "
621687 f"size={ num_bytes } , offset={ offset } , data={ data } , error={ e } "
622688 )
623- return False
624- return True
689+ return False , e .errno
690+
691+ return True , None
625692
626693 def get_lpmode (self ):
627694 """
0 commit comments