File tree Expand file tree Collapse file tree 3 files changed +116
-0
lines changed Expand file tree Collapse file tree 3 files changed +116
-0
lines changed Original file line number Diff line number Diff line change @@ -330,6 +330,7 @@ class Language(str, Enum):
330330 ELIXIR = "elixir"
331331 POWERSHELL = "powershell"
332332 VISUALBASIC6 = "visualbasic6"
333+ MYSQL = "mysql"
333334
334335
335336@dataclass (frozen = True )
Original file line number Diff line number Diff line change @@ -760,6 +760,82 @@ def get_separators_for_language(language: Language) -> list[str]:
760760 " " ,
761761 "" ,
762762 ]
763+ if language == Language .MYSQL :
764+ return [
765+ # Split along definitions
766+ "\n create " ,
767+ "\n CREATE " ,
768+ "\n alter " ,
769+ "\n ALTER " ,
770+ "\n drop " ,
771+ "\n DROP " ,
772+ "\n truncate " ,
773+ "\n TRUNCATE " ,
774+ "\n rename " ,
775+ "\n RENAME " ,
776+ "\n use " ,
777+ "\n USE " ,
778+ "\n desc " ,
779+ "\n DESC " ,
780+ "\n describe " ,
781+ "\n DESCRIBE " ,
782+ # split along Control and procedural code
783+ "\n begin" ,
784+ "\n BEGIN" ,
785+ "\n loop " ,
786+ "\n LOOP " ,
787+ "\n if " ,
788+ "\n IF " ,
789+ "\n while " ,
790+ "\n WHILE " ,
791+ "\n else " ,
792+ "\n ELSE " ,
793+ "\n elseif " ,
794+ "\n ELSEIF " ,
795+ "\n repeat " ,
796+ "\n REPEAT " ,
797+ "\n handler " ,
798+ "\n HANDLER " ,
799+ # split along data manipulation
800+ "\n select " ,
801+ "\n SELECT " ,
802+ "\n insert " ,
803+ "\n INSERT " ,
804+ "\n update " ,
805+ "\n UPDATE " ,
806+ "\n delete " ,
807+ "\n DELETE " ,
808+ "\n replace " ,
809+ "\n REPLACE " ,
810+ "\n with " ,
811+ "\n WITH " ,
812+ "\n show " ,
813+ "\n SHOW " ,
814+ "\n explain " ,
815+ "\n EXPLAIN " ,
816+ "\n call " ,
817+ "\n CALL " ,
818+ # aplit along permissions and transactions
819+ "\n grant " ,
820+ "\n GRANT " ,
821+ "\n revoke " ,
822+ "\n REVOKE " ,
823+ "\n commit " ,
824+ "\n COMMIT " ,
825+ "\n rollback " ,
826+ "\n ROLLBACK " ,
827+ "\n start transaction" ,
828+ "\n START TRANSACTION" ,
829+ "\n set autocommit" ,
830+ "\n SET AUTOCOMMIT" ,
831+ "\n DELIMITER " ,
832+ "\n delimiter " ,
833+ # Split by the normal type of lines
834+ "\n \n " ,
835+ "\n " ,
836+ " " ,
837+ "" ,
838+ ]
763839
764840 if language in Language ._value2member_map_ :
765841 msg = f"Language { language } is not implemented yet!"
Original file line number Diff line number Diff line change @@ -3277,6 +3277,45 @@ def test_visualbasic6_code_splitter() -> None:
32773277 ]
32783278
32793279
3280+ def test_mysql_code_splitter () -> None :
3281+ splitter = RecursiveCharacterTextSplitter .from_language (
3282+ Language .MYSQL ,
3283+ chunk_size = CHUNK_SIZE ,
3284+ chunk_overlap = 0 ,
3285+ )
3286+ code = """
3287+ CREATE TABLE products (
3288+ id INT PRIMARY KEY,
3289+ name VARCHAR(100)
3290+ );
3291+ INSERT INTO products VALUES (1, 'Keyboard'), (2, 'Mouse');
3292+ SELECT * FROM products WHERE id = 1;
3293+ SELECT name FROM products ORDER BY name DESC;
3294+ """
3295+ chunks = splitter .split_text (code )
3296+ assert chunks == [
3297+ "CREATE TABLE" ,
3298+ "products (" ,
3299+ "id INT" ,
3300+ "PRIMARY KEY," ,
3301+ "name" ,
3302+ "VARCHAR(100)" ,
3303+ ");" ,
3304+ "INSERT INTO" ,
3305+ "products VALUES" ,
3306+ "(1," ,
3307+ "'Keyboard')," ,
3308+ "(2, 'Mouse');" ,
3309+ "SELECT * FROM" ,
3310+ "products WHERE" ,
3311+ "id = 1;" ,
3312+ "SELECT name" ,
3313+ "FROM products" ,
3314+ "ORDER BY name" ,
3315+ "DESC;" ,
3316+ ]
3317+
3318+
32803319def custom_iframe_extractor (iframe_tag : Tag ) -> str :
32813320 iframe_src = iframe_tag .get ("src" , "" )
32823321 return f"[iframe:{ iframe_src } ]({ iframe_src } )"
You can’t perform that action at this time.
0 commit comments