You cannot select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
	
	
		
			311 lines
		
	
	
		
			8.5 KiB
		
	
	
	
		
			Python
		
	
			
		
		
	
	
			311 lines
		
	
	
		
			8.5 KiB
		
	
	
	
		
			Python
		
	
from __future__ import annotations
 | 
						|
 | 
						|
from typing import Final
 | 
						|
 | 
						|
magic: Final = (
 | 
						|
    b"\x00\x00\x00\x00\x00\x00\x00\x00"
 | 
						|
    b"\x00\x00\x00\x00\xc2\xea\x81\x60"
 | 
						|
    b"\xb3\x14\x11\xcf\xbd\x92\x08\x00"
 | 
						|
    b"\x09\xc7\x31\x8c\x18\x1f\x10\x11"
 | 
						|
)
 | 
						|
 | 
						|
align_1_checker_value: Final = b"3"
 | 
						|
align_1_offset: Final = 32
 | 
						|
align_1_length: Final = 1
 | 
						|
align_1_value: Final = 4
 | 
						|
u64_byte_checker_value: Final = b"3"
 | 
						|
align_2_offset: Final = 35
 | 
						|
align_2_length: Final = 1
 | 
						|
align_2_value: Final = 4
 | 
						|
endianness_offset: Final = 37
 | 
						|
endianness_length: Final = 1
 | 
						|
platform_offset: Final = 39
 | 
						|
platform_length: Final = 1
 | 
						|
encoding_offset: Final = 70
 | 
						|
encoding_length: Final = 1
 | 
						|
dataset_offset: Final = 92
 | 
						|
dataset_length: Final = 64
 | 
						|
file_type_offset: Final = 156
 | 
						|
file_type_length: Final = 8
 | 
						|
date_created_offset: Final = 164
 | 
						|
date_created_length: Final = 8
 | 
						|
date_modified_offset: Final = 172
 | 
						|
date_modified_length: Final = 8
 | 
						|
header_size_offset: Final = 196
 | 
						|
header_size_length: Final = 4
 | 
						|
page_size_offset: Final = 200
 | 
						|
page_size_length: Final = 4
 | 
						|
page_count_offset: Final = 204
 | 
						|
page_count_length: Final = 4
 | 
						|
sas_release_offset: Final = 216
 | 
						|
sas_release_length: Final = 8
 | 
						|
sas_server_type_offset: Final = 224
 | 
						|
sas_server_type_length: Final = 16
 | 
						|
os_version_number_offset: Final = 240
 | 
						|
os_version_number_length: Final = 16
 | 
						|
os_maker_offset: Final = 256
 | 
						|
os_maker_length: Final = 16
 | 
						|
os_name_offset: Final = 272
 | 
						|
os_name_length: Final = 16
 | 
						|
page_bit_offset_x86: Final = 16
 | 
						|
page_bit_offset_x64: Final = 32
 | 
						|
subheader_pointer_length_x86: Final = 12
 | 
						|
subheader_pointer_length_x64: Final = 24
 | 
						|
page_type_offset: Final = 0
 | 
						|
page_type_length: Final = 2
 | 
						|
block_count_offset: Final = 2
 | 
						|
block_count_length: Final = 2
 | 
						|
subheader_count_offset: Final = 4
 | 
						|
subheader_count_length: Final = 2
 | 
						|
page_type_mask: Final = 0x0F00
 | 
						|
# Keep "page_comp_type" bits
 | 
						|
page_type_mask2: Final = 0xF000 | page_type_mask
 | 
						|
page_meta_type: Final = 0x0000
 | 
						|
page_data_type: Final = 0x0100
 | 
						|
page_mix_type: Final = 0x0200
 | 
						|
page_amd_type: Final = 0x0400
 | 
						|
page_meta2_type: Final = 0x4000
 | 
						|
page_comp_type: Final = 0x9000
 | 
						|
page_meta_types: Final = [page_meta_type, page_meta2_type]
 | 
						|
subheader_pointers_offset: Final = 8
 | 
						|
truncated_subheader_id: Final = 1
 | 
						|
compressed_subheader_id: Final = 4
 | 
						|
compressed_subheader_type: Final = 1
 | 
						|
text_block_size_length: Final = 2
 | 
						|
row_length_offset_multiplier: Final = 5
 | 
						|
row_count_offset_multiplier: Final = 6
 | 
						|
col_count_p1_multiplier: Final = 9
 | 
						|
col_count_p2_multiplier: Final = 10
 | 
						|
row_count_on_mix_page_offset_multiplier: Final = 15
 | 
						|
column_name_pointer_length: Final = 8
 | 
						|
column_name_text_subheader_offset: Final = 0
 | 
						|
column_name_text_subheader_length: Final = 2
 | 
						|
column_name_offset_offset: Final = 2
 | 
						|
column_name_offset_length: Final = 2
 | 
						|
column_name_length_offset: Final = 4
 | 
						|
column_name_length_length: Final = 2
 | 
						|
column_data_offset_offset: Final = 8
 | 
						|
column_data_length_offset: Final = 8
 | 
						|
column_data_length_length: Final = 4
 | 
						|
column_type_offset: Final = 14
 | 
						|
column_type_length: Final = 1
 | 
						|
column_format_text_subheader_index_offset: Final = 22
 | 
						|
column_format_text_subheader_index_length: Final = 2
 | 
						|
column_format_offset_offset: Final = 24
 | 
						|
column_format_offset_length: Final = 2
 | 
						|
column_format_length_offset: Final = 26
 | 
						|
column_format_length_length: Final = 2
 | 
						|
column_label_text_subheader_index_offset: Final = 28
 | 
						|
column_label_text_subheader_index_length: Final = 2
 | 
						|
column_label_offset_offset: Final = 30
 | 
						|
column_label_offset_length: Final = 2
 | 
						|
column_label_length_offset: Final = 32
 | 
						|
column_label_length_length: Final = 2
 | 
						|
rle_compression: Final = b"SASYZCRL"
 | 
						|
rdc_compression: Final = b"SASYZCR2"
 | 
						|
 | 
						|
compression_literals: Final = [rle_compression, rdc_compression]
 | 
						|
 | 
						|
# Incomplete list of encodings, using SAS nomenclature:
 | 
						|
# https://support.sas.com/documentation/onlinedoc/dfdmstudio/2.6/dmpdmsug/Content/dfU_Encodings_SAS.html
 | 
						|
# corresponding to the Python documentation of standard encodings
 | 
						|
# https://docs.python.org/3/library/codecs.html#standard-encodings
 | 
						|
encoding_names: Final = {
 | 
						|
    20: "utf-8",
 | 
						|
    29: "latin1",
 | 
						|
    30: "latin2",
 | 
						|
    31: "latin3",
 | 
						|
    32: "latin4",
 | 
						|
    33: "cyrillic",
 | 
						|
    34: "arabic",
 | 
						|
    35: "greek",
 | 
						|
    36: "hebrew",
 | 
						|
    37: "latin5",
 | 
						|
    38: "latin6",
 | 
						|
    39: "cp874",
 | 
						|
    40: "latin9",
 | 
						|
    41: "cp437",
 | 
						|
    42: "cp850",
 | 
						|
    43: "cp852",
 | 
						|
    44: "cp857",
 | 
						|
    45: "cp858",
 | 
						|
    46: "cp862",
 | 
						|
    47: "cp864",
 | 
						|
    48: "cp865",
 | 
						|
    49: "cp866",
 | 
						|
    50: "cp869",
 | 
						|
    51: "cp874",
 | 
						|
    # 52: "",  # not found
 | 
						|
    # 53: "",  # not found
 | 
						|
    # 54: "",  # not found
 | 
						|
    55: "cp720",
 | 
						|
    56: "cp737",
 | 
						|
    57: "cp775",
 | 
						|
    58: "cp860",
 | 
						|
    59: "cp863",
 | 
						|
    60: "cp1250",
 | 
						|
    61: "cp1251",
 | 
						|
    62: "cp1252",
 | 
						|
    63: "cp1253",
 | 
						|
    64: "cp1254",
 | 
						|
    65: "cp1255",
 | 
						|
    66: "cp1256",
 | 
						|
    67: "cp1257",
 | 
						|
    68: "cp1258",
 | 
						|
    118: "cp950",
 | 
						|
    # 119: "",  # not found
 | 
						|
    123: "big5",
 | 
						|
    125: "gb2312",
 | 
						|
    126: "cp936",
 | 
						|
    134: "euc_jp",
 | 
						|
    136: "cp932",
 | 
						|
    138: "shift_jis",
 | 
						|
    140: "euc-kr",
 | 
						|
    141: "cp949",
 | 
						|
    227: "latin8",
 | 
						|
    # 228: "", # not found
 | 
						|
    # 229: ""  # not found
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
class SASIndex:
 | 
						|
    row_size_index: Final = 0
 | 
						|
    column_size_index: Final = 1
 | 
						|
    subheader_counts_index: Final = 2
 | 
						|
    column_text_index: Final = 3
 | 
						|
    column_name_index: Final = 4
 | 
						|
    column_attributes_index: Final = 5
 | 
						|
    format_and_label_index: Final = 6
 | 
						|
    column_list_index: Final = 7
 | 
						|
    data_subheader_index: Final = 8
 | 
						|
 | 
						|
 | 
						|
subheader_signature_to_index: Final = {
 | 
						|
    b"\xF7\xF7\xF7\xF7": SASIndex.row_size_index,
 | 
						|
    b"\x00\x00\x00\x00\xF7\xF7\xF7\xF7": SASIndex.row_size_index,
 | 
						|
    b"\xF7\xF7\xF7\xF7\x00\x00\x00\x00": SASIndex.row_size_index,
 | 
						|
    b"\xF7\xF7\xF7\xF7\xFF\xFF\xFB\xFE": SASIndex.row_size_index,
 | 
						|
    b"\xF6\xF6\xF6\xF6": SASIndex.column_size_index,
 | 
						|
    b"\x00\x00\x00\x00\xF6\xF6\xF6\xF6": SASIndex.column_size_index,
 | 
						|
    b"\xF6\xF6\xF6\xF6\x00\x00\x00\x00": SASIndex.column_size_index,
 | 
						|
    b"\xF6\xF6\xF6\xF6\xFF\xFF\xFB\xFE": SASIndex.column_size_index,
 | 
						|
    b"\x00\xFC\xFF\xFF": SASIndex.subheader_counts_index,
 | 
						|
    b"\xFF\xFF\xFC\x00": SASIndex.subheader_counts_index,
 | 
						|
    b"\x00\xFC\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.subheader_counts_index,
 | 
						|
    b"\xFF\xFF\xFF\xFF\xFF\xFF\xFC\x00": SASIndex.subheader_counts_index,
 | 
						|
    b"\xFD\xFF\xFF\xFF": SASIndex.column_text_index,
 | 
						|
    b"\xFF\xFF\xFF\xFD": SASIndex.column_text_index,
 | 
						|
    b"\xFD\xFF\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.column_text_index,
 | 
						|
    b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFD": SASIndex.column_text_index,
 | 
						|
    b"\xFF\xFF\xFF\xFF": SASIndex.column_name_index,
 | 
						|
    b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.column_name_index,
 | 
						|
    b"\xFC\xFF\xFF\xFF": SASIndex.column_attributes_index,
 | 
						|
    b"\xFF\xFF\xFF\xFC": SASIndex.column_attributes_index,
 | 
						|
    b"\xFC\xFF\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.column_attributes_index,
 | 
						|
    b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFC": SASIndex.column_attributes_index,
 | 
						|
    b"\xFE\xFB\xFF\xFF": SASIndex.format_and_label_index,
 | 
						|
    b"\xFF\xFF\xFB\xFE": SASIndex.format_and_label_index,
 | 
						|
    b"\xFE\xFB\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.format_and_label_index,
 | 
						|
    b"\xFF\xFF\xFF\xFF\xFF\xFF\xFB\xFE": SASIndex.format_and_label_index,
 | 
						|
    b"\xFE\xFF\xFF\xFF": SASIndex.column_list_index,
 | 
						|
    b"\xFF\xFF\xFF\xFE": SASIndex.column_list_index,
 | 
						|
    b"\xFE\xFF\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.column_list_index,
 | 
						|
    b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFE": SASIndex.column_list_index,
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
# List of frequently used SAS date and datetime formats
 | 
						|
# http://support.sas.com/documentation/cdl/en/etsug/60372/HTML/default/viewer.htm#etsug_intervals_sect009.htm
 | 
						|
# https://github.com/epam/parso/blob/master/src/main/java/com/epam/parso/impl/SasFileConstants.java
 | 
						|
sas_date_formats: Final = (
 | 
						|
    "DATE",
 | 
						|
    "DAY",
 | 
						|
    "DDMMYY",
 | 
						|
    "DOWNAME",
 | 
						|
    "JULDAY",
 | 
						|
    "JULIAN",
 | 
						|
    "MMDDYY",
 | 
						|
    "MMYY",
 | 
						|
    "MMYYC",
 | 
						|
    "MMYYD",
 | 
						|
    "MMYYP",
 | 
						|
    "MMYYS",
 | 
						|
    "MMYYN",
 | 
						|
    "MONNAME",
 | 
						|
    "MONTH",
 | 
						|
    "MONYY",
 | 
						|
    "QTR",
 | 
						|
    "QTRR",
 | 
						|
    "NENGO",
 | 
						|
    "WEEKDATE",
 | 
						|
    "WEEKDATX",
 | 
						|
    "WEEKDAY",
 | 
						|
    "WEEKV",
 | 
						|
    "WORDDATE",
 | 
						|
    "WORDDATX",
 | 
						|
    "YEAR",
 | 
						|
    "YYMM",
 | 
						|
    "YYMMC",
 | 
						|
    "YYMMD",
 | 
						|
    "YYMMP",
 | 
						|
    "YYMMS",
 | 
						|
    "YYMMN",
 | 
						|
    "YYMON",
 | 
						|
    "YYMMDD",
 | 
						|
    "YYQ",
 | 
						|
    "YYQC",
 | 
						|
    "YYQD",
 | 
						|
    "YYQP",
 | 
						|
    "YYQS",
 | 
						|
    "YYQN",
 | 
						|
    "YYQR",
 | 
						|
    "YYQRC",
 | 
						|
    "YYQRD",
 | 
						|
    "YYQRP",
 | 
						|
    "YYQRS",
 | 
						|
    "YYQRN",
 | 
						|
    "YYMMDDP",
 | 
						|
    "YYMMDDC",
 | 
						|
    "E8601DA",
 | 
						|
    "YYMMDDN",
 | 
						|
    "MMDDYYC",
 | 
						|
    "MMDDYYS",
 | 
						|
    "MMDDYYD",
 | 
						|
    "YYMMDDS",
 | 
						|
    "B8601DA",
 | 
						|
    "DDMMYYN",
 | 
						|
    "YYMMDDD",
 | 
						|
    "DDMMYYB",
 | 
						|
    "DDMMYYP",
 | 
						|
    "MMDDYYP",
 | 
						|
    "YYMMDDB",
 | 
						|
    "MMDDYYN",
 | 
						|
    "DDMMYYC",
 | 
						|
    "DDMMYYD",
 | 
						|
    "DDMMYYS",
 | 
						|
    "MINGUO",
 | 
						|
)
 | 
						|
 | 
						|
sas_datetime_formats: Final = (
 | 
						|
    "DATETIME",
 | 
						|
    "DTWKDATX",
 | 
						|
    "B8601DN",
 | 
						|
    "B8601DT",
 | 
						|
    "B8601DX",
 | 
						|
    "B8601DZ",
 | 
						|
    "B8601LX",
 | 
						|
    "E8601DN",
 | 
						|
    "E8601DT",
 | 
						|
    "E8601DX",
 | 
						|
    "E8601DZ",
 | 
						|
    "E8601LX",
 | 
						|
    "DATEAMPM",
 | 
						|
    "DTDATE",
 | 
						|
    "DTMONYY",
 | 
						|
    "DTMONYY",
 | 
						|
    "DTWKDATX",
 | 
						|
    "DTYEAR",
 | 
						|
    "TOD",
 | 
						|
    "MDYAMPM",
 | 
						|
)
 |