From 871480933a1c28f8a9fed4c4d34d06c439a7a422 Mon Sep 17 00:00:00 2001 From: Srikant Patnaik Date: Sun, 11 Jan 2015 12:28:04 +0530 Subject: Moved, renamed, and deleted files The original directory structure was scattered and unorganized. Changes are basically to make it look like kernel structure. --- Documentation/acpi/apei/einj.txt | 107 ++++++++++++++++++++++ Documentation/acpi/apei/output_format.txt | 147 ++++++++++++++++++++++++++++++ 2 files changed, 254 insertions(+) create mode 100644 Documentation/acpi/apei/einj.txt create mode 100644 Documentation/acpi/apei/output_format.txt (limited to 'Documentation/acpi/apei') diff --git a/Documentation/acpi/apei/einj.txt b/Documentation/acpi/apei/einj.txt new file mode 100644 index 00000000..e20b6daa --- /dev/null +++ b/Documentation/acpi/apei/einj.txt @@ -0,0 +1,107 @@ + APEI Error INJection + ~~~~~~~~~~~~~~~~~~~~ + +EINJ provides a hardware error injection mechanism +It is very useful for debugging and testing of other APEI and RAS features. + +To use EINJ, make sure the following are enabled in your kernel +configuration: + +CONFIG_DEBUG_FS +CONFIG_ACPI_APEI +CONFIG_ACPI_APEI_EINJ + +The user interface of EINJ is debug file system, under the +directory apei/einj. The following files are provided. + +- available_error_type + Reading this file returns the error injection capability of the + platform, that is, which error types are supported. The error type + definition is as follow, the left field is the error type value, the + right field is error description. + + 0x00000001 Processor Correctable + 0x00000002 Processor Uncorrectable non-fatal + 0x00000004 Processor Uncorrectable fatal + 0x00000008 Memory Correctable + 0x00000010 Memory Uncorrectable non-fatal + 0x00000020 Memory Uncorrectable fatal + 0x00000040 PCI Express Correctable + 0x00000080 PCI Express Uncorrectable fatal + 0x00000100 PCI Express Uncorrectable non-fatal + 0x00000200 Platform Correctable + 0x00000400 Platform Uncorrectable non-fatal + 0x00000800 Platform Uncorrectable fatal + + The format of file contents are as above, except there are only the + available error type lines. + +- error_type + This file is used to set the error type value. The error type value + is defined in "available_error_type" description. + +- error_inject + Write any integer to this file to trigger the error + injection. Before this, please specify all necessary error + parameters. + +- param1 + This file is used to set the first error parameter value. Effect of + parameter depends on error_type specified. + +- param2 + This file is used to set the second error parameter value. Effect of + parameter depends on error_type specified. + +- notrigger + The EINJ mechanism is a two step process. First inject the error, then + perform some actions to trigger it. Setting "notrigger" to 1 skips the + trigger phase, which *may* allow the user to cause the error in some other + context by a simple access to the cpu, memory location, or device that is + the target of the error injection. Whether this actually works depends + on what operations the BIOS actually includes in the trigger phase. + +BIOS versions based in the ACPI 4.0 specification have limited options +to control where the errors are injected. Your BIOS may support an +extension (enabled with the param_extension=1 module parameter, or +boot command line einj.param_extension=1). This allows the address +and mask for memory injections to be specified by the param1 and +param2 files in apei/einj. + +BIOS versions using the ACPI 5.0 specification have more control over +the target of the injection. For processor related errors (type 0x1, +0x2 and 0x4) the APICID of the target should be provided using the +param1 file in apei/einj. For memory errors (type 0x8, 0x10 and 0x20) +the address is set using param1 with a mask in param2 (0x0 is equivalent +to all ones). For PCI express errors (type 0x40, 0x80 and 0x100) the +segment, bus, device and function are specified using param1: + + 31 24 23 16 15 11 10 8 7 0 + +-------------------------------------------------+ + | segment | bus | device | function | reserved | + +-------------------------------------------------+ + +An ACPI 5.0 BIOS may also allow vendor specific errors to be injected. +In this case a file named vendor will contain identifying information +from the BIOS that hopefully will allow an application wishing to use +the vendor specific extension to tell that they are running on a BIOS +that supports it. All vendor extensions have the 0x80000000 bit set in +error_type. A file vendor_flags controls the interpretation of param1 +and param2 (1 = PROCESSOR, 2 = MEMORY, 4 = PCI). See your BIOS vendor +documentation for details (and expect changes to this API if vendors +creativity in using this feature expands beyond our expectations). + +Example: +# cd /sys/kernel/debug/apei/einj +# cat available_error_type # See which errors can be injected +0x00000002 Processor Uncorrectable non-fatal +0x00000008 Memory Correctable +0x00000010 Memory Uncorrectable non-fatal +# echo 0x12345000 > param1 # Set memory address for injection +# echo 0xfffffffffffff000 > param2 # Mask - anywhere in this page +# echo 0x8 > error_type # Choose correctable memory error +# echo 1 > error_inject # Inject now + + +For more information about EINJ, please refer to ACPI specification +version 4.0, section 17.5 and ACPI 5.0, section 18.6. diff --git a/Documentation/acpi/apei/output_format.txt b/Documentation/acpi/apei/output_format.txt new file mode 100644 index 00000000..0c49c197 --- /dev/null +++ b/Documentation/acpi/apei/output_format.txt @@ -0,0 +1,147 @@ + APEI output format + ~~~~~~~~~~~~~~~~~~ + +APEI uses printk as hardware error reporting interface, the output +format is as follow. + + := +APEI generic hardware error status +severity: , +section: , severity: , +flags: +
+fru_id: +fru_text: +section_type:
+
+ +* := recoverable | fatal | corrected | info + +
# := +[primary][, containment warning][, reset][, threshold exceeded]\ +[, resource not accessible][, latent error] + +
:= generic processor error | memory error | \ +PCIe error | unknown, + +
:= + | | \ + | + + := +[processor_type: , ] +[processor_isa: , ] +[error_type: +] +[operation: , ] +[flags: +] +[level: ] +[version_info: ] +[processor_id: ] +[target_address: ] +[requestor_id: ] +[responder_id: ] +[IP: ] + +* := IA32/X64 | IA64 + +* := IA32 | IA64 | X64 + +# := +[cache error][, TLB error][, bus error][, micro-architectural error] + +* := unknown or generic | data read | data write | \ +instruction execution + +# := +[restartable][, precise IP][, overflow][, corrected] + + := +[error_status: ] +[physical_address: ] +[physical_address_mask: ] +[node: ] +[card: ] +[module: ] +[bank: ] +[device: ] +[row: ] +[column: ] +[bit_position: ] +[requestor_id: ] +[responder_id: ] +[target_id: ] +[error_type: , ] + +* := +unknown | no error | single-bit ECC | multi-bit ECC | \ +single-symbol chipkill ECC | multi-symbol chipkill ECC | master abort | \ +target abort | parity error | watchdog timeout | invalid address | \ +mirror Broken | memory sparing | scrub corrected error | \ +scrub uncorrected error + + := +[port_type: , ] +[version: .] +[command: , status: ] +[device_id: ::. +slot: +secondary_bus: +vendor_id: , device_id: +class_code: ] +[serial number: , ] +[bridge: secondary_status: , control: ] +[aer_status: , aer_mask: + +[aer_uncor_severity: ] +aer_layer=, aer_agent= +aer_tlp_header: ] + +* := PCIe end point | legacy PCI end point | \ +unknown | unknown | root port | upstream switch port | \ +downstream switch port | PCIe to PCI/PCI-X bridge | \ +PCI/PCI-X to PCIe bridge | root complex integrated endpoint device | \ +root complex event collector + +if section severity is fatal or recoverable +# := +unknown | unknown | unknown | unknown | Data Link Protocol | \ +unknown | unknown | unknown | unknown | unknown | unknown | unknown | \ +Poisoned TLP | Flow Control Protocol | Completion Timeout | \ +Completer Abort | Unexpected Completion | Receiver Overflow | \ +Malformed TLP | ECRC | Unsupported Request +else +# := +Receiver Error | unknown | unknown | unknown | unknown | unknown | \ +Bad TLP | Bad DLLP | RELAY_NUM Rollover | unknown | unknown | unknown | \ +Replay Timer Timeout | Advisory Non-Fatal +fi + + := +Physical Layer | Data Link Layer | Transaction Layer + + := +Receiver ID | Requester ID | Completer ID | Transmitter ID + +Where, [] designate corresponding content is optional + +All description with * has the following format: + +field: , + +Where value of should be the position of "string" in description. Otherwise, will be "unknown". + +All description with # has the following format: + +field: + + +Where each string in corresponding to one set bit of +. The bit position is the position of "string" in description. + +For more detailed explanation of every field, please refer to UEFI +specification version 2.3 or later, section Appendix N: Common +Platform Error Record. -- cgit