Search Results

Search found 4803 results on 193 pages for 'eclipse pdt'.

Page 193/193 | < Previous Page | 189 190 191 192 193 

  • Android App crashing on Back Button (performResumeActivity)

    - by Rutger
    My App consists of 2 Activities at the moment. . the MAIN activity with a Gallery View . a FriendsListActivity with a ListView When the user moves away from the FriendsListActivity with the back button, and returns to the MAIN activity the following error keeps popping up in debug mode. DalvikVM[localhost:8676] Thread [<1 main] (Suspended (exception RuntimeException)) ActivityThread.performResumeActivity(IBinder, boolean) line: 2095 ActivityThread.handleResumeActivity(IBinder, boolean, boolean) line: 2110 BinderProxy(ActivityThread$H).handleMessage(Message) line: 954 ActivityThread$H(Handler).dispatchMessage(Message) line: 99 Looper.loop() line: 123 ActivityThread.main(String[]) line: 3647 Method.invokeNative(Object, Object[], Class, Class[], Class, int, boolean) line: not available [native method] Method.invoke(Object, Object...) line: 507 ZygoteInit$MethodAndArgsCaller.run() line: 839 ZygoteInit.main(String[]) line: 597 NativeStart.main(String[]) line: not available [native method] Thread [<8 Binder Thread #2] (Running) Thread [<7 Binder Thread #1] (Running) With the LogCat 03-13 22:01:10.972: ERROR/AndroidRuntime(1038): FATAL EXCEPTION: main 03-13 22:01:10.972: ERROR/AndroidRuntime(1038): java.lang.RuntimeException: Unable to resume activity {com.package.MAIN/com.package.MAIN.MAIN}: java.lang.NullPointerException 03-13 22:01:10.972: ERROR/AndroidRuntime(1038): at android.app.ActivityThread.performResumeActivity(ActivityThread.java:2095) 03-13 22:01:10.972: ERROR/AndroidRuntime(1038): at android.app.ActivityThread.handleResumeActivity(ActivityThread.java:2110) 03-13 22:01:10.972: ERROR/AndroidRuntime(1038): at android.app.ActivityThread$H.handleMessage(ActivityThread.java:954) 03-13 22:01:10.972: ERROR/AndroidRuntime(1038): at android.os.Handler.dispatchMessage(Handler.java:99) 03-13 22:01:10.972: ERROR/AndroidRuntime(1038): at android.os.Looper.loop(Looper.java:123) 03-13 22:01:10.972: ERROR/AndroidRuntime(1038): at android.app.ActivityThread.main(ActivityThread.java:3647) 03-13 22:01:10.972: ERROR/AndroidRuntime(1038): at java.lang.reflect.Method.invokeNative(Native Method) 03-13 22:01:10.972: ERROR/AndroidRuntime(1038): at java.lang.reflect.Method.invoke(Method.java:507) 03-13 22:01:10.972: ERROR/AndroidRuntime(1038): at com.android.internal.os.ZygoteInit$MethodAndArgsCaller.run(ZygoteInit.java:839) 03-13 22:01:10.972: ERROR/AndroidRuntime(1038): at com.android.internal.os.ZygoteInit.main(ZygoteInit.java:597) 03-13 22:01:10.972: ERROR/AndroidRuntime(1038): at dalvik.system.NativeStart.main(Native Method) 03-13 22:01:10.972: ERROR/AndroidRuntime(1038): Caused by: java.lang.NullPointerException 03-13 22:01:10.972: ERROR/AndroidRuntime(1038): at com.package.MAIN.MAIN.onResume(MAIN.java:91) 03-13 22:01:10.972: ERROR/AndroidRuntime(1038): at android.app.Instrumentation.callActivityOnResume(Instrumentation.java:1149) 03-13 22:01:10.972: ERROR/AndroidRuntime(1038): at android.app.Activity.performResume(Activity.java:3833) 03-13 22:01:10.972: ERROR/AndroidRuntime(1038): at android.app.ActivityThread.performResumeActivity(ActivityThread.java:2085) 03-13 22:01:10.972: ERROR/AndroidRuntime(1038): ... 10 more Further info in the Variables panel states: this: ActivityThread e: NullPointerException cause: NullPointerException detailMessage: null stackTrace: null r: ActivityThread$ActivityClientRecord activity: MAIN detailMessage after one Eclipse Resume: Unable to resume activity (MAIN) The code from the FriendsListActivity looks like this public class FriendsListActivity extends ListActivity { // =========================================================== // Fields // =========================================================== private ArrayList<Friend> friends = new ArrayList<Friend>(); private FriendsArrayAdapter friendsArrayAdapter; private ListView listView; // =========================================================== // onCreate // =========================================================== @Override public void onCreate(Bundle savedInstanceState) { super.onCreate(savedInstanceState); requestWindowFeature(Window.FEATURE_NO_TITLE); setContentView(R.layout.friends_list); registerForContextMenu(getListView()); setButtonNewFriendClickListener(); } public void generateFriendsList() { FriendsService fs = new FriendsService(this); friends = fs.getFriendsList(); listView = (ListView) findViewById(android.R.id.list); friendsArrayAdapter = new FriendsArrayAdapter( this, R.layout.friend_list_item, friends); listView.setAdapter(friendsArrayAdapter); } @Override public void onCreateContextMenu(ContextMenu menu, View v, ContextMenuInfo menuInfo) { super.onCreateContextMenu(menu, v, menuInfo); MenuInflater inflater = getMenuInflater(); inflater.inflate(R.menu.friends_context_menu, menu); } @Override public boolean onContextItemSelected(MenuItem item) { AdapterContextMenuInfo info = (AdapterContextMenuInfo) item.getMenuInfo(); FriendsService fs = new FriendsService(this); Friend f = new Friend(); f = friends.get(info.position); switch (item.getItemId()) { case R.id.edit: Intent i = new Intent(this, FriendEditActivity.class); i.putExtra("userid", f.userId); startActivity(i); return true; case R.id.delete: fs.deleteFriend(f.userId); generateFriendsList(); return true; default: return super.onContextItemSelected(item); } } // =========================================================== // onPause // =========================================================== protected void onPause() { super.onPause(); finish(); } // =========================================================== // onResume // =========================================================== protected void onResume() { super.onResume(); generateFriendsList(); } // =========================================================== // onStop // =========================================================== protected void onStop() { super.onStop(); } // =========================================================== // onDestroy // =========================================================== @Override protected void onDestroy() { super.onDestroy(); } // =========================================================== // Activity methods // =========================================================== private void setButtonNewFriendClickListener() { Button clickButton = (Button)findViewById(R.id.button_add_friend); clickButton.setOnClickListener(new View.OnClickListener() { public void onClick(View v) { Intent i = new Intent(v.getContext(), FriendNewActivity.class); startActivity(i); } }); } The AndroidManifest looks like this <manifest xmlns:android="http://schemas.android.com/apk/res/android" android:versionCode="1" android:versionName="1.0" package="com.package.mypackage"> <uses-permission android:name="android.permission.CAMERA" /> <uses-permission android:name="android.permission.INTERNET" /> <uses-permission android:name="android.permission.WRITE_EXTERNAL_STORAGE" /> <application android:icon="@drawable/icon" android:label="@string/app_name"> <activity android:name="com.package.mypackage.mypackage" android:label="@string/app_name"> <intent-filter> <action android:name="android.intent.action.MAIN" /> <category android:name="android.intent.category.LAUNCHER" /> </intent-filter> </activity> <activity android:name=".FriendsListActivity"></activity> <activity android:name=".FriendEditActivity"></activity> <activity android:name=".FriendNewActivity"></activity> <activity android:name=".TakePictureActivity"></activity> <activity android:name=".FriendsService"></activity> <activity android:name=".MyService"></activity> </application> <uses-sdk android:minSdkVersion="9" /> The MAIN activity looks like this: package com.package.mypackage; import java.util.ArrayList; import com.package.domain.Domain; import com.package.service.MyService; import com.package.viewadapter.myImageAdapter; import android.app.Activity; import android.content.Intent; import android.os.Bundle; import android.view.View; import android.view.Window; import android.widget.AdapterView; import android.widget.AdapterView.OnItemClickListener; import android.widget.AdapterView.OnItemLongClickListener; import android.widget.Button; import android.widget.Gallery; import android.widget.Toast; public class myActivity extends Activity { // =========================================================== // Fields // =========================================================== private MyImageAdapter myImageAdapter; private ArrayList<Domain> domain = new ArrayList<Domain>(); // =========================================================== // onCreate // =========================================================== @Override public void onCreate(Bundle savedInstanceState) { super.onCreate(savedInstanceState); requestWindowFeature(Window.FEATURE_NO_TITLE); setContentView(R.layout.main); /* Set Buttons to listen for any click event. */ setButtonFriendsClickListener(); setButtonCameraClickListener(); setButtonPreferencesClickListener(); } // =========================================================== // onStart // =========================================================== @Override public void onStart() { super.onStart(); /* Find the gallery defined in the main.xml */ Gallery g = (Gallery) findViewById(R.id.gallery); /* Show a Toast message when image is clicked */ g.setOnItemClickListener(new OnItemClickListener() { @Override public void onItemClick(AdapterView<?> parent, View v, int position, long id) { MyImageAdapter image_ID = new MyImageAdapter(myActivity.this, position, null); if (image_ID.getItemId(position) == 0) { Toast test_toast = Toast.makeText(myActivity.this, "This is the New Image click", Toast.LENGTH_SHORT); test_toast.show(); } else { Toast test_toast = Toast.makeText(myActivity.this, "The clicked image has image number " + image_ID.getItemId(position) + " in the imageadapter.", Toast.LENGTH_SHORT); test_toast.show(); } } }); g.setOnItemLongClickListener(new OnItemLongClickListener() { @Override public boolean onItemLongClick(AdapterView<?> parent, View v, int position, long id) { MyImageAdapter image_ID = new MyImageAdapter(myActivity.this, position, null); Toast test_toast = Toast.makeText(myActivity.this, "The long clicked image has image number " + image_ID.getItemId(position) + " in the imageadapter.", Toast.LENGTH_SHORT); test_toast.show(); return true; } }); } // =========================================================== // onPause // =========================================================== protected void onPause() { super.onPause(); } // =========================================================== // onResume // =========================================================== protected void onResume() { super.onResume(); generateMyGallery(); } // =========================================================== // onStop // =========================================================== protected void onStop() { super.onStop(); } // =========================================================== // onDestroy // Is also called when user changes from horizontal // to vertical orientation and back // =========================================================== @Override protected void onDestroy() { super.onDestroy(); } // =========================================================== // Save and Restore UI states // =========================================================== @Override protected void onRestoreInstanceState(Bundle savedInstanceState) { super.onRestoreInstanceState(savedInstanceState); } protected void onSaveInstanceState(Bundle outState) { super.onSaveInstanceState(outState); } // =========================================================== // Main Activity methods // =========================================================== public void generateMyGallery() { MyService cs = new MyService(this); domain = cs.getDomainList(); // Add the new_image drawable to the ArrayList Domain d = new Domain(); d.photoLocation = "drawable"; d.photoName = "new_image"; d.extra1 = "no_text"; d.extra2 = "no_text"; domain.add(0, d); myImageAdapter = new MyImageAdapter(this, R.layout.text_overlay_image_view, domain); /* Find the gallery defined in the main.xml */ Gallery g = (Gallery) findViewById(R.id.gallery); g.setSpacing(10); /* Apply a new (custom) ImageAdapter to it. */ g.setAdapter(myImageAdapter); g.setSelection(1); } private void setButtonFriendsClickListener() { Button clickButton = (Button)findViewById(R.id.button_friends_list); clickButton.setOnClickListener(new View.OnClickListener() { public void onClick(View v) { setContentView(R.layout.friends_list); Intent myIntent = new Intent(v.getContext(), FriendsListActivity.class); startActivity(myIntent); } }); } private void setButtonCameraClickListener() { Button clickButton = (Button)findViewById(R.id.button_take_picture); clickButton.setOnClickListener(new View.OnClickListener() { public void onClick(View v) { setContentView(R.layout.take_picture); Intent myIntent = new Intent(v.getContext(), TakePictureActivity.class); startActivity(myIntent); } }); } private void setButtonPreferencesClickListener() { Button clickButton = (Button)findViewById(R.id.button_preferences); clickButton.setOnClickListener(new View.OnClickListener() { public void onClick(View v) { Intent myIntent = new Intent(v.getContext(), MyPreferencesActivity.class); startActivity(myIntent); } }); } }; Anyone has an idea why the App crashes. Any help is much appreciated. I did find out that, when I finish() the MAIN activity when I start the FriendListActivity and restart the MAIN activity when closing the FriendListActivity, that the crash does not occur. However, this basically restarts the App and that is not the intention. Thanks all, I got the problem solved. This is what I did. . When moving all gallery related actions to the onCreate, the creash didn't happen anymore. But then after returning to the MAIN activity, the activity wasn't shown. . I then moved all the onCreate (except the super), the generateGallery, and the onStart() to the onResume. Now it works fine!

    Read the article

  • SQLite, python, unicode, and non-utf data

    - by Nathan Spears
    I started by trying to store strings in sqlite using python, and got the message: sqlite3.ProgrammingError: You must not use 8-bit bytestrings unless you use a text_factory that can interpret 8-bit bytestrings (like text_factory = str). It is highly recommended that you instead just switch your application to Unicode strings. Ok, I switched to Unicode strings. Then I started getting the message: sqlite3.OperationalError: Could not decode to UTF-8 column 'tag_artist' with text 'Sigur Rós' when trying to retrieve data from the db. More research and I started encoding it in utf8, but then 'Sigur Rós' starts looking like 'Sigur Rós' note: My console was set to display in 'latin_1' as @John Machin pointed out. What gives? After reading this, describing exactly the same situation I'm in, it seems as if the advice is to ignore the other advice and use 8-bit bytestrings after all. I didn't know much about unicode and utf before I started this process. I've learned quite a bit in the last couple hours, but I'm still ignorant of whether there is a way to correctly convert 'ó' from latin-1 to utf-8 and not mangle it. If there isn't, why would sqlite 'highly recommend' I switch my application to unicode strings? I'm going to update this question with a summary and some example code of everything I've learned in the last 24 hours so that someone in my shoes can have an easy(er) guide. If the information I post is wrong or misleading in any way please tell me and I'll update, or one of you senior guys can update. Summary of answers Let me first state the goal as I understand it. The goal in processing various encodings, if you are trying to convert between them, is to understand what your source encoding is, then convert it to unicode using that source encoding, then convert it to your desired encoding. Unicode is a base and encodings are mappings of subsets of that base. utf_8 has room for every character in unicode, but because they aren't in the same place as, for instance, latin_1, a string encoded in utf_8 and sent to a latin_1 console will not look the way you expect. In python the process of getting to unicode and into another encoding looks like: str.decode('source_encoding').encode('desired_encoding') or if the str is already in unicode str.encode('desired_encoding') For sqlite I didn't actually want to encode it again, I wanted to decode it and leave it in unicode format. Here are four things you might need to be aware of as you try to work with unicode and encodings in python. The encoding of the string you want to work with, and the encoding you want to get it to. The system encoding. The console encoding. The encoding of the source file Elaboration: (1) When you read a string from a source, it must have some encoding, like latin_1 or utf_8. In my case, I'm getting strings from filenames, so unfortunately, I could be getting any kind of encoding. Windows XP uses UCS-2 (a Unicode system) as its native string type, which seems like cheating to me. Fortunately for me, the characters in most filenames are not going to be made up of more than one source encoding type, and I think all of mine were either completely latin_1, completely utf_8, or just plain ascii (which is a subset of both of those). So I just read them and decoded them as if they were still in latin_1 or utf_8. It's possible, though, that you could have latin_1 and utf_8 and whatever other characters mixed together in a filename on Windows. Sometimes those characters can show up as boxes, other times they just look mangled, and other times they look correct (accented characters and whatnot). Moving on. (2) Python has a default system encoding that gets set when python starts and can't be changed during runtime. See here for details. Dirty summary ... well here's the file I added: \# sitecustomize.py \# this file can be anywhere in your Python path, \# but it usually goes in ${pythondir}/lib/site-packages/ import sys sys.setdefaultencoding('utf_8') This system encoding is the one that gets used when you use the unicode("str") function without any other encoding parameters. To say that another way, python tries to decode "str" to unicode based on the default system encoding. (3) If you're using IDLE or the command-line python, I think that your console will display according to the default system encoding. I am using pydev with eclipse for some reason, so I had to go into my project settings, edit the launch configuration properties of my test script, go to the Common tab, and change the console from latin-1 to utf-8 so that I could visually confirm what I was doing was working. (4) If you want to have some test strings, eg test_str = "ó" in your source code, then you will have to tell python what kind of encoding you are using in that file. (FYI: when I mistyped an encoding I had to ctrl-Z because my file became unreadable.) This is easily accomplished by putting a line like so at the top of your source code file: # -*- coding: utf_8 -*- If you don't have this information, python attempts to parse your code as ascii by default, and so: SyntaxError: Non-ASCII character '\xf3' in file _redacted_ on line 81, but no encoding declared; see http://www.python.org/peps/pep-0263.html for details Once your program is working correctly, or, if you aren't using python's console or any other console to look at output, then you will probably really only care about #1 on the list. System default and console encoding are not that important unless you need to look at output and/or you are using the builtin unicode() function (without any encoding parameters) instead of the string.decode() function. I wrote a demo function I will paste into the bottom of this gigantic mess that I hope correctly demonstrates the items in my list. Here is some of the output when I run the character 'ó' through the demo function, showing how various methods react to the character as input. My system encoding and console output are both set to utf_8 for this run: '?' = original char <type 'str'> repr(char)='\xf3' '?' = unicode(char) ERROR: 'utf8' codec can't decode byte 0xf3 in position 0: unexpected end of data 'ó' = char.decode('latin_1') <type 'unicode'> repr(char.decode('latin_1'))=u'\xf3' '?' = char.decode('utf_8') ERROR: 'utf8' codec can't decode byte 0xf3 in position 0: unexpected end of data Now I will change the system and console encoding to latin_1, and I get this output for the same input: 'ó' = original char <type 'str'> repr(char)='\xf3' 'ó' = unicode(char) <type 'unicode'> repr(unicode(char))=u'\xf3' 'ó' = char.decode('latin_1') <type 'unicode'> repr(char.decode('latin_1'))=u'\xf3' '?' = char.decode('utf_8') ERROR: 'utf8' codec can't decode byte 0xf3 in position 0: unexpected end of data Notice that the 'original' character displays correctly and the builtin unicode() function works now. Now I change my console output back to utf_8. '?' = original char <type 'str'> repr(char)='\xf3' '?' = unicode(char) <type 'unicode'> repr(unicode(char))=u'\xf3' '?' = char.decode('latin_1') <type 'unicode'> repr(char.decode('latin_1'))=u'\xf3' '?' = char.decode('utf_8') ERROR: 'utf8' codec can't decode byte 0xf3 in position 0: unexpected end of data Here everything still works the same as last time but the console can't display the output correctly. Etc. The function below also displays more information that this and hopefully would help someone figure out where the gap in their understanding is. I know all this information is in other places and more thoroughly dealt with there, but I hope that this would be a good kickoff point for someone trying to get coding with python and/or sqlite. Ideas are great but sometimes source code can save you a day or two of trying to figure out what functions do what. Disclaimers: I'm no encoding expert, I put this together to help my own understanding. I kept building on it when I should have probably started passing functions as arguments to avoid so much redundant code, so if I can I'll make it more concise. Also, utf_8 and latin_1 are by no means the only encoding schemes, they are just the two I was playing around with because I think they handle everything I need. Add your own encoding schemes to the demo function and test your own input. One more thing: there are apparently crazy application developers making life difficult in Windows. #!/usr/bin/env python # -*- coding: utf_8 -*- import os import sys def encodingDemo(str): validStrings = () try: print "str =",str,"{0} repr(str) = {1}".format(type(str), repr(str)) validStrings += ((str,""),) except UnicodeEncodeError as ude: print "Couldn't print the str itself because the console is set to an encoding that doesn't understand some character in the string. See error:\n\t", print ude try: x = unicode(str) print "unicode(str) = ",x validStrings+= ((x, " decoded into unicode by the default system encoding"),) except UnicodeDecodeError as ude: print "ERROR. unicode(str) couldn't decode the string because the system encoding is set to an encoding that doesn't understand some character in the string." print "\tThe system encoding is set to {0}. See error:\n\t".format(sys.getdefaultencoding()), print ude except UnicodeEncodeError as uee: print "ERROR. Couldn't print the unicode(str) because the console is set to an encoding that doesn't understand some character in the string. See error:\n\t", print uee try: x = str.decode('latin_1') print "str.decode('latin_1') =",x validStrings+= ((x, " decoded with latin_1 into unicode"),) try: print "str.decode('latin_1').encode('utf_8') =",str.decode('latin_1').encode('utf_8') validStrings+= ((x, " decoded with latin_1 into unicode and encoded into utf_8"),) except UnicodeDecodeError as ude: print "The string was decoded into unicode using the latin_1 encoding, but couldn't be encoded into utf_8. See error:\n\t", print ude except UnicodeDecodeError as ude: print "Something didn't work, probably because the string wasn't latin_1 encoded. See error:\n\t", print ude except UnicodeEncodeError as uee: print "ERROR. Couldn't print the str.decode('latin_1') because the console is set to an encoding that doesn't understand some character in the string. See error:\n\t", print uee try: x = str.decode('utf_8') print "str.decode('utf_8') =",x validStrings+= ((x, " decoded with utf_8 into unicode"),) try: print "str.decode('utf_8').encode('latin_1') =",str.decode('utf_8').encode('latin_1') except UnicodeDecodeError as ude: print "str.decode('utf_8').encode('latin_1') didn't work. The string was decoded into unicode using the utf_8 encoding, but couldn't be encoded into latin_1. See error:\n\t", validStrings+= ((x, " decoded with utf_8 into unicode and encoded into latin_1"),) print ude except UnicodeDecodeError as ude: print "str.decode('utf_8') didn't work, probably because the string wasn't utf_8 encoded. See error:\n\t", print ude except UnicodeEncodeError as uee: print "ERROR. Couldn't print the str.decode('utf_8') because the console is set to an encoding that doesn't understand some character in the string. See error:\n\t",uee print print "Printing information about each character in the original string." for char in str: try: print "\t'" + char + "' = original char {0} repr(char)={1}".format(type(char), repr(char)) except UnicodeDecodeError as ude: print "\t'?' = original char {0} repr(char)={1} ERROR PRINTING: {2}".format(type(char), repr(char), ude) except UnicodeEncodeError as uee: print "\t'?' = original char {0} repr(char)={1} ERROR PRINTING: {2}".format(type(char), repr(char), uee) print uee try: x = unicode(char) print "\t'" + x + "' = unicode(char) {1} repr(unicode(char))={2}".format(x, type(x), repr(x)) except UnicodeDecodeError as ude: print "\t'?' = unicode(char) ERROR: {0}".format(ude) except UnicodeEncodeError as uee: print "\t'?' = unicode(char) {0} repr(char)={1} ERROR PRINTING: {2}".format(type(x), repr(x), uee) try: x = char.decode('latin_1') print "\t'" + x + "' = char.decode('latin_1') {1} repr(char.decode('latin_1'))={2}".format(x, type(x), repr(x)) except UnicodeDecodeError as ude: print "\t'?' = char.decode('latin_1') ERROR: {0}".format(ude) except UnicodeEncodeError as uee: print "\t'?' = char.decode('latin_1') {0} repr(char)={1} ERROR PRINTING: {2}".format(type(x), repr(x), uee) try: x = char.decode('utf_8') print "\t'" + x + "' = char.decode('utf_8') {1} repr(char.decode('utf_8'))={2}".format(x, type(x), repr(x)) except UnicodeDecodeError as ude: print "\t'?' = char.decode('utf_8') ERROR: {0}".format(ude) except UnicodeEncodeError as uee: print "\t'?' = char.decode('utf_8') {0} repr(char)={1} ERROR PRINTING: {2}".format(type(x), repr(x), uee) print x = 'ó' encodingDemo(x) Much thanks for the answers below and especially to @John Machin for answering so thoroughly.

    Read the article

  • SQLite, python, unicode, and non-utf data

    - by Nathan Spears
    I started by trying to store strings in sqlite using python, and got the message: sqlite3.ProgrammingError: You must not use 8-bit bytestrings unless you use a text_factory that can interpret 8-bit bytestrings (like text_factory = str). It is highly recommended that you instead just switch your application to Unicode strings. Ok, I switched to Unicode strings. Then I started getting the message: sqlite3.OperationalError: Could not decode to UTF-8 column 'tag_artist' with text 'Sigur Rós' when trying to retrieve data from the db. More research and I started encoding it in utf8, but then 'Sigur Rós' starts looking like 'Sigur Rós' note: My console was set to display in 'latin_1' as @John Machin pointed out. What gives? After reading this, describing exactly the same situation I'm in, it seems as if the advice is to ignore the other advice and use 8-bit bytestrings after all. I didn't know much about unicode and utf before I started this process. I've learned quite a bit in the last couple hours, but I'm still ignorant of whether there is a way to correctly convert 'ó' from latin-1 to utf-8 and not mangle it. If there isn't, why would sqlite 'highly recommend' I switch my application to unicode strings? I'm going to update this question with a summary and some example code of everything I've learned in the last 24 hours so that someone in my shoes can have an easy(er) guide. If the information I post is wrong or misleading in any way please tell me and I'll update, or one of you senior guys can update. Summary of answers Let me first state the goal as I understand it. The goal in processing various encodings, if you are trying to convert between them, is to understand what your source encoding is, then convert it to unicode using that source encoding, then convert it to your desired encoding. Unicode is a base and encodings are mappings of subsets of that base. utf_8 has room for every character in unicode, but because they aren't in the same place as, for instance, latin_1, a string encoded in utf_8 and sent to a latin_1 console will not look the way you expect. In python the process of getting to unicode and into another encoding looks like: str.decode('source_encoding').encode('desired_encoding') or if the str is already in unicode str.encode('desired_encoding') For sqlite I didn't actually want to encode it again, I wanted to decode it and leave it in unicode format. Here are four things you might need to be aware of as you try to work with unicode and encodings in python. The encoding of the string you want to work with, and the encoding you want to get it to. The system encoding. The console encoding. The encoding of the source file Elaboration: (1) When you read a string from a source, it must have some encoding, like latin_1 or utf_8. In my case, I'm getting strings from filenames, so unfortunately, I could be getting any kind of encoding. Windows XP uses UCS-2 (a Unicode system) as its native string type, which seems like cheating to me. Fortunately for me, the characters in most filenames are not going to be made up of more than one source encoding type, and I think all of mine were either completely latin_1, completely utf_8, or just plain ascii (which is a subset of both of those). So I just read them and decoded them as if they were still in latin_1 or utf_8. It's possible, though, that you could have latin_1 and utf_8 and whatever other characters mixed together in a filename on Windows. Sometimes those characters can show up as boxes, other times they just look mangled, and other times they look correct (accented characters and whatnot). Moving on. (2) Python has a default system encoding that gets set when python starts and can't be changed during runtime. See here for details. Dirty summary ... well here's the file I added: \# sitecustomize.py \# this file can be anywhere in your Python path, \# but it usually goes in ${pythondir}/lib/site-packages/ import sys sys.setdefaultencoding('utf_8') This system encoding is the one that gets used when you use the unicode("str") function without any other encoding parameters. To say that another way, python tries to decode "str" to unicode based on the default system encoding. (3) If you're using IDLE or the command-line python, I think that your console will display according to the default system encoding. I am using pydev with eclipse for some reason, so I had to go into my project settings, edit the launch configuration properties of my test script, go to the Common tab, and change the console from latin-1 to utf-8 so that I could visually confirm what I was doing was working. (4) If you want to have some test strings, eg test_str = "ó" in your source code, then you will have to tell python what kind of encoding you are using in that file. (FYI: when I mistyped an encoding I had to ctrl-Z because my file became unreadable.) This is easily accomplished by putting a line like so at the top of your source code file: # -*- coding: utf_8 -*- If you don't have this information, python attempts to parse your code as ascii by default, and so: SyntaxError: Non-ASCII character '\xf3' in file _redacted_ on line 81, but no encoding declared; see http://www.python.org/peps/pep-0263.html for details Once your program is working correctly, or, if you aren't using python's console or any other console to look at output, then you will probably really only care about #1 on the list. System default and console encoding are not that important unless you need to look at output and/or you are using the builtin unicode() function (without any encoding parameters) instead of the string.decode() function. I wrote a demo function I will paste into the bottom of this gigantic mess that I hope correctly demonstrates the items in my list. Here is some of the output when I run the character 'ó' through the demo function, showing how various methods react to the character as input. My system encoding and console output are both set to utf_8 for this run: '?' = original char <type 'str'> repr(char)='\xf3' '?' = unicode(char) ERROR: 'utf8' codec can't decode byte 0xf3 in position 0: unexpected end of data 'ó' = char.decode('latin_1') <type 'unicode'> repr(char.decode('latin_1'))=u'\xf3' '?' = char.decode('utf_8') ERROR: 'utf8' codec can't decode byte 0xf3 in position 0: unexpected end of data Now I will change the system and console encoding to latin_1, and I get this output for the same input: 'ó' = original char <type 'str'> repr(char)='\xf3' 'ó' = unicode(char) <type 'unicode'> repr(unicode(char))=u'\xf3' 'ó' = char.decode('latin_1') <type 'unicode'> repr(char.decode('latin_1'))=u'\xf3' '?' = char.decode('utf_8') ERROR: 'utf8' codec can't decode byte 0xf3 in position 0: unexpected end of data Notice that the 'original' character displays correctly and the builtin unicode() function works now. Now I change my console output back to utf_8. '?' = original char <type 'str'> repr(char)='\xf3' '?' = unicode(char) <type 'unicode'> repr(unicode(char))=u'\xf3' '?' = char.decode('latin_1') <type 'unicode'> repr(char.decode('latin_1'))=u'\xf3' '?' = char.decode('utf_8') ERROR: 'utf8' codec can't decode byte 0xf3 in position 0: unexpected end of data Here everything still works the same as last time but the console can't display the output correctly. Etc. The function below also displays more information that this and hopefully would help someone figure out where the gap in their understanding is. I know all this information is in other places and more thoroughly dealt with there, but I hope that this would be a good kickoff point for someone trying to get coding with python and/or sqlite. Ideas are great but sometimes source code can save you a day or two of trying to figure out what functions do what. Disclaimers: I'm no encoding expert, I put this together to help my own understanding. I kept building on it when I should have probably started passing functions as arguments to avoid so much redundant code, so if I can I'll make it more concise. Also, utf_8 and latin_1 are by no means the only encoding schemes, they are just the two I was playing around with because I think they handle everything I need. Add your own encoding schemes to the demo function and test your own input. One more thing: there are apparently crazy application developers making life difficult in Windows. #!/usr/bin/env python # -*- coding: utf_8 -*- import os import sys def encodingDemo(str): validStrings = () try: print "str =",str,"{0} repr(str) = {1}".format(type(str), repr(str)) validStrings += ((str,""),) except UnicodeEncodeError as ude: print "Couldn't print the str itself because the console is set to an encoding that doesn't understand some character in the string. See error:\n\t", print ude try: x = unicode(str) print "unicode(str) = ",x validStrings+= ((x, " decoded into unicode by the default system encoding"),) except UnicodeDecodeError as ude: print "ERROR. unicode(str) couldn't decode the string because the system encoding is set to an encoding that doesn't understand some character in the string." print "\tThe system encoding is set to {0}. See error:\n\t".format(sys.getdefaultencoding()), print ude except UnicodeEncodeError as uee: print "ERROR. Couldn't print the unicode(str) because the console is set to an encoding that doesn't understand some character in the string. See error:\n\t", print uee try: x = str.decode('latin_1') print "str.decode('latin_1') =",x validStrings+= ((x, " decoded with latin_1 into unicode"),) try: print "str.decode('latin_1').encode('utf_8') =",str.decode('latin_1').encode('utf_8') validStrings+= ((x, " decoded with latin_1 into unicode and encoded into utf_8"),) except UnicodeDecodeError as ude: print "The string was decoded into unicode using the latin_1 encoding, but couldn't be encoded into utf_8. See error:\n\t", print ude except UnicodeDecodeError as ude: print "Something didn't work, probably because the string wasn't latin_1 encoded. See error:\n\t", print ude except UnicodeEncodeError as uee: print "ERROR. Couldn't print the str.decode('latin_1') because the console is set to an encoding that doesn't understand some character in the string. See error:\n\t", print uee try: x = str.decode('utf_8') print "str.decode('utf_8') =",x validStrings+= ((x, " decoded with utf_8 into unicode"),) try: print "str.decode('utf_8').encode('latin_1') =",str.decode('utf_8').encode('latin_1') except UnicodeDecodeError as ude: print "str.decode('utf_8').encode('latin_1') didn't work. The string was decoded into unicode using the utf_8 encoding, but couldn't be encoded into latin_1. See error:\n\t", validStrings+= ((x, " decoded with utf_8 into unicode and encoded into latin_1"),) print ude except UnicodeDecodeError as ude: print "str.decode('utf_8') didn't work, probably because the string wasn't utf_8 encoded. See error:\n\t", print ude except UnicodeEncodeError as uee: print "ERROR. Couldn't print the str.decode('utf_8') because the console is set to an encoding that doesn't understand some character in the string. See error:\n\t",uee print print "Printing information about each character in the original string." for char in str: try: print "\t'" + char + "' = original char {0} repr(char)={1}".format(type(char), repr(char)) except UnicodeDecodeError as ude: print "\t'?' = original char {0} repr(char)={1} ERROR PRINTING: {2}".format(type(char), repr(char), ude) except UnicodeEncodeError as uee: print "\t'?' = original char {0} repr(char)={1} ERROR PRINTING: {2}".format(type(char), repr(char), uee) print uee try: x = unicode(char) print "\t'" + x + "' = unicode(char) {1} repr(unicode(char))={2}".format(x, type(x), repr(x)) except UnicodeDecodeError as ude: print "\t'?' = unicode(char) ERROR: {0}".format(ude) except UnicodeEncodeError as uee: print "\t'?' = unicode(char) {0} repr(char)={1} ERROR PRINTING: {2}".format(type(x), repr(x), uee) try: x = char.decode('latin_1') print "\t'" + x + "' = char.decode('latin_1') {1} repr(char.decode('latin_1'))={2}".format(x, type(x), repr(x)) except UnicodeDecodeError as ude: print "\t'?' = char.decode('latin_1') ERROR: {0}".format(ude) except UnicodeEncodeError as uee: print "\t'?' = char.decode('latin_1') {0} repr(char)={1} ERROR PRINTING: {2}".format(type(x), repr(x), uee) try: x = char.decode('utf_8') print "\t'" + x + "' = char.decode('utf_8') {1} repr(char.decode('utf_8'))={2}".format(x, type(x), repr(x)) except UnicodeDecodeError as ude: print "\t'?' = char.decode('utf_8') ERROR: {0}".format(ude) except UnicodeEncodeError as uee: print "\t'?' = char.decode('utf_8') {0} repr(char)={1} ERROR PRINTING: {2}".format(type(x), repr(x), uee) print x = 'ó' encodingDemo(x) Much thanks for the answers below and especially to @John Machin for answering so thoroughly.

    Read the article

< Previous Page | 189 190 191 192 193