1414 * limitations under the License.
1515 */
1616
17+ /*
18+ The MIT License (MIT)
19+
20+ Copyright (c) 2017 Facebook Inc.
21+ Copyright (c) 2017 Georgia Institute of Technology
22+ Copyright 2019 Google LLC
23+
24+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
25+ associated documentation files (the "Software"), to deal in the Software without restriction,
26+ including without limitation the rights to use, copy, modify, merge, publish, distribute,
27+ sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
28+ furnished to do so, subject to the following conditions:
29+
30+ The above copyright notice and this permission notice shall be included in all copies or
31+ substantial portions of the Software.
32+
33+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
34+ NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
35+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
36+ OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
37+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38+ */
39+
1740#ifndef ONERT_MICRO_EXECUTE_PAL_DEQUANTIZE_COMMON_H
1841#define ONERT_MICRO_EXECUTE_PAL_DEQUANTIZE_COMMON_H
1942
2346#include " PALUtils.h"
2447
2548#include < cmath>
49+ #include < bit>
50+
51+ namespace
52+ {
53+
54+ // / Notice that this code comes from FP16(https://github.com/Maratyszcza/FP16) under MIT License
55+
56+ /*
57+ * Convert a 16-bit floating-point number in IEEE half-precision format, in bit representation, to
58+ * a 32-bit floating-point number in IEEE single-precision format, in bit representation.
59+ *
60+ * @note The implementation doesn't use any floating-point operations.
61+ */
62+ uint32_t fp16_ieee_to_fp32_bits (uint16_t h)
63+ {
64+ /*
65+ * Extend the half-precision floating-point number to 32 bits and shift to the upper part of the
66+ * 32-bit word:
67+ * +---+-----+------------+-------------------+
68+ * | S |EEEEE|MM MMMM MMMM|0000 0000 0000 0000|
69+ * +---+-----+------------+-------------------+
70+ * Bits 31 26-30 16-25 0-15
71+ *
72+ * S - sign bit, E - bits of the biased exponent, M - bits of the mantissa, 0 - zero bits.
73+ */
74+ const uint32_t w = (uint32_t )h << 16 ;
75+ /*
76+ * Extract the sign of the input number into the high bit of the 32-bit word:
77+ *
78+ * +---+----------------------------------+
79+ * | S |0000000 00000000 00000000 00000000|
80+ * +---+----------------------------------+
81+ * Bits 31 0-31
82+ */
83+ const uint32_t sign = w & UINT32_C (0x80000000 );
84+ /*
85+ * Extract mantissa and biased exponent of the input number into the bits 0-30 of the 32-bit word:
86+ *
87+ * +---+-----+------------+-------------------+
88+ * | 0 |EEEEE|MM MMMM MMMM|0000 0000 0000 0000|
89+ * +---+-----+------------+-------------------+
90+ * Bits 30 27-31 17-26 0-16
91+ */
92+ const uint32_t nonsign = w & UINT32_C (0x7FFFFFFF );
93+ /*
94+ * Renorm shift is the number of bits to shift mantissa left to make the half-precision number
95+ * normalized. If the initial number is normalized, some of its high 6 bits (sign == 0 and 5-bit
96+ * exponent) equals one. In this case renorm_shift == 0. If the number is denormalize,
97+ * renorm_shift > 0. Note that if we shift denormalized nonsign by renorm_shift, the unit bit of
98+ * mantissa will shift into exponent, turning the biased exponent into 1, and making mantissa
99+ * normalized (i.e. without leading 1).
100+ */
101+ #ifdef _MSC_VER
102+ unsigned long nonsign_bsr;
103+ _BitScanReverse (&nonsign_bsr, (unsigned long )nonsign);
104+ uint32_t renorm_shift = (uint32_t )nonsign_bsr ^ 31 ;
105+ #else
106+ uint32_t renorm_shift = __builtin_clz (nonsign);
107+ #endif
108+ renorm_shift = renorm_shift > 5 ? renorm_shift - 5 : 0 ;
109+ /*
110+ * Iff half-precision number has exponent of 15, the addition overflows it into bit 31,
111+ * and the subsequent shift turns the high 9 bits into 1. Thus
112+ * inf_nan_mask ==
113+ * 0x7F800000 if the half-precision number had exponent of 15 (i.e. was NaN or
114+ * infinity) 0x00000000 otherwise
115+ */
116+ const int32_t inf_nan_mask = ((int32_t )(nonsign + 0x04000000 ) >> 8 ) & INT32_C (0x7F800000 );
117+ /*
118+ * Iff nonsign is 0, it overflows into 0xFFFFFFFF, turning bit 31 into 1. Otherwise, bit 31
119+ * remains 0. The signed shift right by 31 broadcasts bit 31 into all bits of the zero_mask. Thus
120+ * zero_mask ==
121+ * 0xFFFFFFFF if the half-precision number was zero (+0.0h or -0.0h)
122+ * 0x00000000 otherwise
123+ */
124+ const int32_t zero_mask = (int32_t )(nonsign - 1 ) >> 31 ;
125+ /*
126+ * 1. Shift nonsign left by renorm_shift to normalize it (if the input was denormal)
127+ * 2. Shift nonsign right by 3 so the exponent (5 bits originally) becomes an 8-bit field and
128+ * 10-bit mantissa shifts into the 10 high bits of the 23-bit mantissa of IEEE single-precision
129+ * number.
130+ * 3. Add 0x70 to the exponent (starting at bit 23) to compensate the different in exponent bias
131+ * (0x7F for single-precision number less 0xF for half-precision number).
132+ * 4. Subtract renorm_shift from the exponent (starting at bit 23) to account for renormalization.
133+ * As renorm_shift is less than 0x70, this can be combined with step 3.
134+ * 5. Binary OR with inf_nan_mask to turn the exponent into 0xFF if the input was NaN or infinity.
135+ * 6. Binary ANDNOT with zero_mask to turn the mantissa and exponent into zero if the input was
136+ * zero.
137+ * 7. Combine with the sign of the input number.
138+ */
139+ return sign | ((((nonsign << renorm_shift >> 3 ) + ((0x70 - renorm_shift) << 23 )) | inf_nan_mask) &
140+ ~zero_mask);
141+ }
142+
143+ } // namespace
26144
27145namespace onert_micro
28146{
@@ -46,6 +164,20 @@ OMStatus Dequantize(const core::QuantizationParams op_params, const uint32_t fla
46164 }
47165 return Ok;
48166}
167+
168+ OMStatus DequantizeF16toF32 (const uint32_t flat_size, const uint16_t *input_data,
169+ float *output_data)
170+ {
171+ for (uint32_t i = 0 ; i < flat_size; i++)
172+ {
173+ uint32_t f32_in_bits = (fp16_ieee_to_fp32_bits (input_data[i]));
174+ float val;
175+ memcpy (&val, &f32_in_bits, sizeof (float ));
176+ output_data[i] = val;
177+ }
178+ return Ok;
179+ }
180+
49181} // namespace pal
50182} // namespace execute
51183} // namespace onert_micro
0 commit comments