290 lines
		
	
	
		
			9.0 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
		
		
			
		
	
	
			290 lines
		
	
	
		
			9.0 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
|  | #!/usr/bin/awk -f | ||
|  | 
 | ||
|  | # Authors: @esperlu, @artemyk, @gkuenning, @dumblob | ||
|  | 
 | ||
|  | # FIXME detect empty input file and issue a warning | ||
|  | 
 | ||
|  | function printerr( s ){ print s | "cat >&2" } | ||
|  | 
 | ||
|  | BEGIN { | ||
|  |   if( ARGC != 2 ){ | ||
|  |     printerr( \ | ||
|  |       "USAGE:\n"\ | ||
|  |       "       mysql2sqlite dump_mysql.sql > dump_sqlite3.sql\n" \ | ||
|  |       "       OR\n" \ | ||
|  |       "       mysql2sqlite dump_mysql.sql | sqlite3 sqlite.db\n" \ | ||
|  |       "\n" \ | ||
|  |       "NOTES:\n" \ | ||
|  |       "       Dash in filename is not supported, because dash (-) means stdin." ) | ||
|  |     no_END = 1 | ||
|  |     exit 1 | ||
|  |   } | ||
|  | 
 | ||
|  |   # Find INT_MAX supported by both this AWK (usually an ISO C signed int) | ||
|  |   #   and SQlite. | ||
|  |   # On non-8bit-based architectures, the additional bits are safely ignored. | ||
|  | 
 | ||
|  |   # 8bit (lower precision should not exist) | ||
|  |   s="127" | ||
|  |   # "63" + 0 avoids potential parser misbehavior | ||
|  |   if( (s + 0) "" == s ){ INT_MAX_HALF = "63" + 0 } | ||
|  |   # 16bit | ||
|  |   s="32767" | ||
|  |   if( (s + 0) "" == s ){ INT_MAX_HALF = "16383" + 0 } | ||
|  |   # 32bit | ||
|  |   s="2147483647" | ||
|  |   if( (s + 0) "" == s ){ INT_MAX_HALF = "1073741823" + 0 } | ||
|  |   # 64bit (as INTEGER in SQlite3) | ||
|  |   s="9223372036854775807" | ||
|  |   if( (s + 0) "" == s ){ INT_MAX_HALF = "4611686018427387904" + 0 } | ||
|  | #  # 128bit | ||
|  | #  s="170141183460469231731687303715884105728" | ||
|  | #  if( (s + 0) "" == s ){ INT_MAX_HALF = "85070591730234615865843651857942052864" + 0 } | ||
|  | #  # 256bit | ||
|  | #  s="57896044618658097711785492504343953926634992332820282019728792003956564819968" | ||
|  | #  if( (s + 0) "" == s ){ INT_MAX_HALF = "28948022309329048855892746252171976963317496166410141009864396001978282409984" + 0 } | ||
|  | #  # 512bit | ||
|  | #  s="6703903964971298549787012499102923063739682910296196688861780721860882015036773488400937149083451713845015929093243025426876941405973284973216824503042048" | ||
|  | #  if( (s + 0) "" == s ){ INT_MAX_HALF = "3351951982485649274893506249551461531869841455148098344430890360930441007518386744200468574541725856922507964546621512713438470702986642486608412251521024" + 0 } | ||
|  | #  # 1024bit | ||
|  | #  s="89884656743115795386465259539451236680898848947115328636715040578866337902750481566354238661203768010560056939935696678829394884407208311246423715319737062188883946712432742638151109800623047059726541476042502884419075341171231440736956555270413618581675255342293149119973622969239858152417678164812112068608" | ||
|  | #  if( (s + 0) "" == s ){ INT_MAX_HALF = "44942328371557897693232629769725618340449424473557664318357520289433168951375240783177119330601884005280028469967848339414697442203604155623211857659868531094441973356216371319075554900311523529863270738021251442209537670585615720368478277635206809290837627671146574559986811484619929076208839082406056034304" + 0 } | ||
|  | #  # higher precision probably not needed | ||
|  | 
 | ||
|  |   FS=",$" | ||
|  |   print "PRAGMA synchronous = OFF;" | ||
|  |   print "PRAGMA journal_mode = MEMORY;" | ||
|  |   print "BEGIN TRANSACTION;" | ||
|  | } | ||
|  | 
 | ||
|  | # historically 3 spaces separate non-argument local variables | ||
|  | function bit_to_int( str_bit,   powtwo, i, res, bit, overflow ){ | ||
|  |   powtwo = 1 | ||
|  |   overflow = 0 | ||
|  |   # 011101 = 1*2^0 + 0*2^1 + 1*2^2 ... | ||
|  |   for( i = length( str_bit ); i > 0; --i ){ | ||
|  |     bit = substr( str_bit, i, 1 ) | ||
|  |     if( overflow || ( bit == 1 && res > INT_MAX_HALF ) ){ | ||
|  |       printerr( \ | ||
|  |         NR ": WARN Bit field overflow, number truncated (LSBs saved, MSBs ignored)." ) | ||
|  |       break | ||
|  |     } | ||
|  |     res = res + bit * powtwo | ||
|  |     # no warning here as it might be the last iteration | ||
|  |     if( powtwo > INT_MAX_HALF ){ overflow = 1; continue } | ||
|  |     powtwo = powtwo * 2 | ||
|  |   } | ||
|  |   return res | ||
|  | } | ||
|  | 
 | ||
|  | # CREATE TRIGGER statements have funny commenting. Remember we are in trigger. | ||
|  | /^\/\*.*(CREATE.*TRIGGER|create.*trigger)/ { | ||
|  |   gsub( /^.*(TRIGGER|trigger)/, "CREATE TRIGGER" ) | ||
|  |   print | ||
|  |   inTrigger = 1 | ||
|  |   next | ||
|  | } | ||
|  | # The end of CREATE TRIGGER has a stray comment terminator | ||
|  | /(END|end) \*\/;;/ { gsub( /\*\//, "" ); print; inTrigger = 0; next } | ||
|  | # The rest of triggers just get passed through | ||
|  | inTrigger != 0 { print; next } | ||
|  | 
 | ||
|  | # CREATE VIEW looks like a TABLE in comments | ||
|  | /^\/\*.*(CREATE.*TABLE|create.*table)/ { | ||
|  |   inView = 1 | ||
|  |   next | ||
|  | } | ||
|  | # end of CREATE VIEW | ||
|  | /^(\).*(ENGINE|engine).*\*\/;)/ { | ||
|  |   inView = 0 | ||
|  |   next | ||
|  | } | ||
|  | # content of CREATE VIEW | ||
|  | inView != 0 { next } | ||
|  | 
 | ||
|  | # skip comments | ||
|  | /^\/\*/ { next } | ||
|  | 
 | ||
|  | # skip PARTITION statements | ||
|  | /^ *[(]?(PARTITION|partition) +[^ ]+/ { next } | ||
|  | 
 | ||
|  | # print all INSERT lines | ||
|  | ( /^ *\(/ && /\) *[,;] *$/ ) || /^(INSERT|insert|REPLACE|replace)/ { | ||
|  |   prev = "" | ||
|  | 
 | ||
|  |   # first replace \\ by \_ that mysqldump never generates to deal with | ||
|  |   # sequnces like \\n that should be translated into \n, not \<LF>. | ||
|  |   # After we convert all escapes we replace \_ by backslashes. | ||
|  |   gsub( /\\\\/, "\\_" ) | ||
|  | 
 | ||
|  |   # single quotes are escaped by another single quote | ||
|  |   gsub( /\\'/, "''" ) | ||
|  |   gsub( /\\n/, "\n" ) | ||
|  |   gsub( /\\r/, "\r" ) | ||
|  |   gsub( /\\"/, "\"" ) | ||
|  |   gsub( /\\\032/, "\032" )  # substitute char | ||
|  | 
 | ||
|  |   gsub( /\\_/, "\\" ) | ||
|  | 
 | ||
|  |   # sqlite3 is limited to 16 significant digits of precision | ||
|  |   while( match( $0, /0x[0-9a-fA-F]{17}/ ) ){ | ||
|  |     hexIssue = 1 | ||
|  |     sub( /0x[0-9a-fA-F]+/, substr( $0, RSTART, RLENGTH-1 ), $0 ) | ||
|  |   } | ||
|  |   if( hexIssue ){ | ||
|  |     printerr( \ | ||
|  |       NR ": WARN Hex number trimmed (length longer than 16 chars)." ) | ||
|  |     hexIssue = 0 | ||
|  |   } | ||
|  |   print | ||
|  |   next | ||
|  | } | ||
|  | 
 | ||
|  | # CREATE DATABASE is not supported | ||
|  | /^(CREATE DATABASE|create database)/ { next } | ||
|  | 
 | ||
|  | # print the CREATE line as is and capture the table name | ||
|  | /^(CREATE|create)/ { | ||
|  |   if( $0 ~ /IF NOT EXISTS|if not exists/ || $0 ~ /TEMPORARY|temporary/ ){ | ||
|  |     caseIssue = 1 | ||
|  |     printerr( \ | ||
|  |       NR ": WARN Potential case sensitivity issues with table/column naming\n" \ | ||
|  |       "          (see INFO at the end)." ) | ||
|  |   } | ||
|  |   if( match( $0, /`[^`]+/ ) ){ | ||
|  |     tableName = substr( $0, RSTART+1, RLENGTH-1 ) | ||
|  |   } | ||
|  |   aInc = 0 | ||
|  |   prev = "" | ||
|  |   firstInTable = 1 | ||
|  |   print | ||
|  |   next | ||
|  | } | ||
|  | 
 | ||
|  | # Replace `FULLTEXT KEY` (probably other `XXXXX KEY`) | ||
|  | /^  (FULLTEXT KEY|fulltext key)/ { gsub( /[A-Za-z ]+(KEY|key)/, "  KEY" ) } | ||
|  | 
 | ||
|  | # Get rid of field lengths in KEY lines | ||
|  | / (PRIMARY |primary )?(KEY|key)/ { gsub( /\([0-9]+\)/, "" ) } | ||
|  | 
 | ||
|  | aInc == 1 && /PRIMARY KEY|primary key/ { next } | ||
|  | 
 | ||
|  | # Replace COLLATE xxx_xxxx_xx statements with COLLATE BINARY | ||
|  | / (COLLATE|collate) [a-z0-9_]*/ { gsub( /(COLLATE|collate) [a-z0-9_]*/, "COLLATE BINARY" ) } | ||
|  | 
 | ||
|  | # Print all fields definition lines except the `KEY` lines. | ||
|  | /^  / && !/^(  (KEY|key)|\);)/ { | ||
|  |   if( match( $0, /[^"`]AUTO_INCREMENT|auto_increment[^"`]/) ){ | ||
|  |     aInc = 1 | ||
|  |     gsub( /AUTO_INCREMENT|auto_increment/, "PRIMARY KEY AUTOINCREMENT" ) | ||
|  |   } | ||
|  |   gsub( /(UNIQUE KEY|unique key) (`.*`|".*") /, "UNIQUE " ) | ||
|  |   gsub( /(CHARACTER SET|character set) [^ ]+[ ,]/, "" ) | ||
|  |   # FIXME | ||
|  |   #   CREATE TRIGGER [UpdateLastTime] | ||
|  |   #   AFTER UPDATE | ||
|  |   #   ON Package | ||
|  |   #   FOR EACH ROW | ||
|  |   #   BEGIN | ||
|  |   #   UPDATE Package SET LastUpdate = CURRENT_TIMESTAMP WHERE ActionId = old.ActionId; | ||
|  |   #   END | ||
|  |   gsub( /(ON|on) (UPDATE|update) (CURRENT_TIMESTAMP|current_timestamp)(\(\))?/, "" ) | ||
|  |   gsub( /(DEFAULT|default) (CURRENT_TIMESTAMP|current_timestamp)(\(\))?/, "DEFAULT current_timestamp") | ||
|  |   gsub( /(COLLATE|collate) [^ ]+ /, "" ) | ||
|  |   gsub( /(ENUM|enum)[^)]+\)/, "text " ) | ||
|  |   gsub( /(SET|set)\([^)]+\)/, "text " ) | ||
|  |   gsub( /UNSIGNED|unsigned/, "" ) | ||
|  |   gsub( /_utf8mb3/, "" ) | ||
|  |   gsub( /` [^ ]*(INT|int|BIT|bit)[^ ]*/, "` integer" ) | ||
|  |   gsub( /" [^ ]*(INT|int|BIT|bit)[^ ]*/, "\" integer" ) | ||
|  |   ere_bit_field = "[bB]'[10]+'" | ||
|  |   if( match($0, ere_bit_field) ){ | ||
|  |     sub( ere_bit_field, bit_to_int( substr( $0, RSTART +2, RLENGTH -2 -1 ) ) ) | ||
|  |   } | ||
|  | 
 | ||
|  |   # remove USING BTREE and other suffixes for USING, for example: "UNIQUE KEY | ||
|  |   # `hostname_domain` (`hostname`,`domain`) USING BTREE," | ||
|  |   gsub( / USING [^, ]+/, "" ) | ||
|  | 
 | ||
|  |   # field comments are not supported | ||
|  |   gsub( / (COMMENT|comment).+$/, "" ) | ||
|  |   # Get commas off end of line | ||
|  |   gsub( /,.?$/, "" ) | ||
|  |   if( prev ){ | ||
|  |     if( firstInTable ){ | ||
|  |       print prev | ||
|  |       firstInTable = 0 | ||
|  |     } | ||
|  |     else { | ||
|  |       print "," prev | ||
|  |     } | ||
|  |   } | ||
|  |   else { | ||
|  |     # FIXME check if this is correct in all cases | ||
|  |     if( match( $1, | ||
|  |         /(CONSTRAINT|constraint) ["].*["] (FOREIGN KEY|foreign key)/ ) ){ | ||
|  |       print "," | ||
|  |     } | ||
|  |   } | ||
|  |   prev = $1 | ||
|  | } | ||
|  | 
 | ||
|  | / ENGINE| engine/ { | ||
|  |   if( prev ){ | ||
|  |     if( firstInTable ){ | ||
|  |       print prev | ||
|  |       firstInTable = 0 | ||
|  |     } | ||
|  |     else { | ||
|  |       print "," prev | ||
|  |     } | ||
|  |   } | ||
|  |   prev="" | ||
|  |   print ");" | ||
|  |   next | ||
|  | } | ||
|  | # `KEY` lines are extracted from the `CREATE` block and stored in array for later print | ||
|  | # in a separate `CREATE KEY` command. The index name is prefixed by the table name to | ||
|  | # avoid a sqlite error for duplicate index name. | ||
|  | /^(  (KEY|key)|\);)/ { | ||
|  |   if( prev ){ | ||
|  |     if( firstInTable ){ | ||
|  |       print prev | ||
|  |       firstInTable = 0 | ||
|  |     } | ||
|  |     else { | ||
|  |       print "," prev | ||
|  |     } | ||
|  |   } | ||
|  |   prev = "" | ||
|  |   if( $0 == ");" ){ | ||
|  |     print | ||
|  |   } | ||
|  |   else { | ||
|  |     if( match( $0, /`[^`]+/ ) ){ | ||
|  |       indexName = substr( $0, RSTART+1, RLENGTH-1 ) | ||
|  |     } | ||
|  |     if( match( $0, /\([^()]+/ ) ){ | ||
|  |       indexKey = substr( $0, RSTART+1, RLENGTH-1 ) | ||
|  |     } | ||
|  |     # idx_ prefix to avoid name clashes (they really happen!) | ||
|  |     key[tableName] = key[tableName] "CREATE INDEX \"idx_" \ | ||
|  |        tableName "_" indexName "\" ON \"" tableName "\" (" indexKey ");\n" | ||
|  |   } | ||
|  | } | ||
|  | 
 | ||
|  | END { | ||
|  |   if( no_END ){ exit 1} | ||
|  |   # print all KEY creation lines. | ||
|  |   for( table in key ){ printf key[table] } | ||
|  | 
 | ||
|  |   print "END TRANSACTION;" | ||
|  | 
 | ||
|  |   if( caseIssue ){ | ||
|  |     printerr( \ | ||
|  |       "INFO Pure sqlite identifiers are case insensitive (even if quoted\n" \ | ||
|  |       "     or if ASCII) and doesnt cross-check TABLE and TEMPORARY TABLE\n" \ | ||
|  |       "     identifiers. Thus expect errors like \"table T has no column named F\".") | ||
|  |   } | ||
|  | } |